diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -3,66549 +3,98672 @@ "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, - "global_step": 9503, + "global_step": 14092, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "grad_norm": 36.36130527170318, - "learning_rate": 3.4965034965034967e-08, - "loss": 1.8874, + "grad_norm": 0.8587175627969085, + "learning_rate": 2.364066193853428e-08, + "loss": 0.4041, "step": 1 }, { "epoch": 0.0, - "grad_norm": 44.35509081795796, - "learning_rate": 6.993006993006993e-08, - "loss": 1.9623, + "grad_norm": 5.583009923102745, + "learning_rate": 4.728132387706856e-08, + "loss": 0.9724, "step": 2 }, { "epoch": 0.0, - "grad_norm": 113.15059681611625, - "learning_rate": 1.048951048951049e-07, - "loss": 1.8315, + "grad_norm": 6.076889550887127, + "learning_rate": 7.092198581560284e-08, + "loss": 0.9748, "step": 3 }, { "epoch": 0.0, - "grad_norm": 26.480686239957798, - "learning_rate": 1.3986013986013987e-07, - "loss": 1.8153, + "grad_norm": 4.958300688747302, + "learning_rate": 9.456264775413712e-08, + "loss": 0.8295, "step": 4 }, { "epoch": 0.0, - "grad_norm": 73.55538638754199, - "learning_rate": 1.7482517482517484e-07, - "loss": 1.8261, + "grad_norm": 6.079811480892564, + "learning_rate": 1.182033096926714e-07, + "loss": 1.0067, "step": 5 }, { "epoch": 0.0, - "grad_norm": 44.47995785747746, - "learning_rate": 2.097902097902098e-07, - "loss": 1.7684, + "grad_norm": 6.051209274504425, + "learning_rate": 1.4184397163120568e-07, + "loss": 0.9317, "step": 6 }, { "epoch": 0.0, - "grad_norm": 41.19958226318581, - "learning_rate": 2.447552447552448e-07, - "loss": 1.7842, + "grad_norm": 5.92102203332061, + "learning_rate": 1.6548463356973995e-07, + "loss": 1.0185, "step": 7 }, { "epoch": 0.0, - "grad_norm": 162.3686148569218, - "learning_rate": 2.7972027972027973e-07, - "loss": 1.8389, + "grad_norm": 6.498622705489556, + "learning_rate": 1.8912529550827425e-07, + "loss": 0.9687, "step": 8 }, { "epoch": 0.0, - "grad_norm": 37.77584116684587, - "learning_rate": 3.1468531468531473e-07, - "loss": 1.739, + "grad_norm": 5.814821658435063, + "learning_rate": 2.1276595744680852e-07, + "loss": 0.9695, "step": 9 }, { "epoch": 0.0, - "grad_norm": 33.567385322447045, - "learning_rate": 3.496503496503497e-07, - "loss": 1.823, + "grad_norm": 6.446124476869084, + "learning_rate": 2.364066193853428e-07, + "loss": 0.8598, "step": 10 }, { "epoch": 0.0, - "grad_norm": 79.25205422236616, - "learning_rate": 3.846153846153847e-07, - "loss": 1.8705, + "grad_norm": 5.516335803697005, + "learning_rate": 2.6004728132387706e-07, + "loss": 0.9629, "step": 11 }, { "epoch": 0.0, - "grad_norm": 29.01030979777457, - "learning_rate": 4.195804195804196e-07, - "loss": 1.8595, + "grad_norm": 5.09784018677111, + "learning_rate": 2.8368794326241136e-07, + "loss": 0.9552, "step": 12 }, { "epoch": 0.0, - "grad_norm": 32.29577345941253, - "learning_rate": 4.5454545454545457e-07, - "loss": 1.7776, + "grad_norm": 5.029348032951575, + "learning_rate": 3.0732860520094566e-07, + "loss": 0.9829, "step": 13 }, { "epoch": 0.0, - "grad_norm": 75.68576026951285, - "learning_rate": 4.895104895104896e-07, - "loss": 1.7253, + "grad_norm": 5.197871010360706, + "learning_rate": 3.309692671394799e-07, + "loss": 0.961, "step": 14 }, { "epoch": 0.0, - "grad_norm": 44.31708263717978, - "learning_rate": 5.244755244755246e-07, - "loss": 1.6779, + "grad_norm": 7.444068851120301, + "learning_rate": 3.5460992907801425e-07, + "loss": 0.8559, "step": 15 }, { "epoch": 0.0, - "grad_norm": 219.5878152798679, - "learning_rate": 5.594405594405595e-07, - "loss": 1.702, + "grad_norm": 5.263953889786298, + "learning_rate": 3.782505910165485e-07, + "loss": 0.8968, "step": 16 }, { "epoch": 0.0, - "grad_norm": 33.33211308321619, - "learning_rate": 5.944055944055945e-07, - "loss": 1.5494, + "grad_norm": 5.9308618238654365, + "learning_rate": 4.018912529550828e-07, + "loss": 1.0012, "step": 17 }, { "epoch": 0.0, - "grad_norm": 28.77943791137145, - "learning_rate": 6.293706293706295e-07, - "loss": 1.5089, + "grad_norm": 0.7103527167811602, + "learning_rate": 4.2553191489361704e-07, + "loss": 0.4147, "step": 18 }, { "epoch": 0.0, - "grad_norm": 60.96425095593503, - "learning_rate": 6.643356643356644e-07, - "loss": 1.4609, + "grad_norm": 5.4601398805553085, + "learning_rate": 4.4917257683215134e-07, + "loss": 1.0205, "step": 19 }, { "epoch": 0.0, - "grad_norm": 21.119045219567745, - "learning_rate": 6.993006993006994e-07, - "loss": 1.4632, + "grad_norm": 5.842036311266042, + "learning_rate": 4.728132387706856e-07, + "loss": 0.9705, "step": 20 }, { "epoch": 0.0, - "grad_norm": 30.087479668348955, - "learning_rate": 7.342657342657343e-07, - "loss": 1.5663, + "grad_norm": 8.941207999783432, + "learning_rate": 4.964539007092199e-07, + "loss": 0.9253, "step": 21 }, { "epoch": 0.0, - "grad_norm": 22.134522059806123, - "learning_rate": 7.692307692307694e-07, - "loss": 1.4837, + "grad_norm": 5.199456691240805, + "learning_rate": 5.200945626477541e-07, + "loss": 0.9801, "step": 22 }, { "epoch": 0.0, - "grad_norm": 29.13387995825167, - "learning_rate": 8.041958041958043e-07, - "loss": 1.4038, + "grad_norm": 7.912397961401164, + "learning_rate": 5.437352245862885e-07, + "loss": 0.9274, "step": 23 }, { "epoch": 0.0, - "grad_norm": 8.488083114385395, - "learning_rate": 8.391608391608393e-07, - "loss": 1.2151, + "grad_norm": 4.640194361056253, + "learning_rate": 5.673758865248227e-07, + "loss": 0.9181, "step": 24 }, { "epoch": 0.0, - "grad_norm": 10.410898426509407, - "learning_rate": 8.741258741258741e-07, - "loss": 1.2384, + "grad_norm": 5.096234565296452, + "learning_rate": 5.91016548463357e-07, + "loss": 0.9708, "step": 25 }, { "epoch": 0.0, - "grad_norm": 7.196734276367997, - "learning_rate": 9.090909090909091e-07, - "loss": 1.2946, + "grad_norm": 4.865217896053551, + "learning_rate": 6.146572104018913e-07, + "loss": 0.9088, "step": 26 }, { "epoch": 0.0, - "grad_norm": 5.855727873849648, - "learning_rate": 9.44055944055944e-07, - "loss": 1.3565, + "grad_norm": 4.2449427005788785, + "learning_rate": 6.382978723404255e-07, + "loss": 0.9388, "step": 27 }, { "epoch": 0.0, - "grad_norm": 9.775814502806083, - "learning_rate": 9.790209790209791e-07, - "loss": 1.2246, + "grad_norm": 4.072191074509652, + "learning_rate": 6.619385342789598e-07, + "loss": 0.9533, "step": 28 }, { "epoch": 0.0, - "grad_norm": 4.850328552766668, - "learning_rate": 1.013986013986014e-06, - "loss": 0.8368, + "grad_norm": 4.431695321161389, + "learning_rate": 6.855791962174942e-07, + "loss": 0.89, "step": 29 }, { "epoch": 0.0, - "grad_norm": 5.46226966648362, - "learning_rate": 1.0489510489510491e-06, - "loss": 0.8374, + "grad_norm": 3.5165089283738817, + "learning_rate": 7.092198581560285e-07, + "loss": 0.7818, "step": 30 }, { "epoch": 0.0, - "grad_norm": 4.878572158964688, - "learning_rate": 1.083916083916084e-06, - "loss": 1.1998, + "grad_norm": 4.092919331252151, + "learning_rate": 7.328605200945627e-07, + "loss": 0.8877, "step": 31 }, { "epoch": 0.0, - "grad_norm": 7.823282919023532, - "learning_rate": 1.118881118881119e-06, - "loss": 1.3195, + "grad_norm": 4.4152596066655585, + "learning_rate": 7.56501182033097e-07, + "loss": 0.8657, "step": 32 }, { "epoch": 0.0, - "grad_norm": 10.031172311822704, - "learning_rate": 1.153846153846154e-06, - "loss": 1.1547, + "grad_norm": 0.7185456183956519, + "learning_rate": 7.801418439716313e-07, + "loss": 0.4143, "step": 33 }, { "epoch": 0.0, - "grad_norm": 5.192629192579879, - "learning_rate": 1.188811188811189e-06, - "loss": 1.1146, + "grad_norm": 3.2372119629501883, + "learning_rate": 8.037825059101656e-07, + "loss": 0.8192, "step": 34 }, { "epoch": 0.0, - "grad_norm": 7.2298322921309905, - "learning_rate": 1.2237762237762238e-06, - "loss": 1.0787, + "grad_norm": 4.028327891852994, + "learning_rate": 8.274231678486998e-07, + "loss": 0.8476, "step": 35 }, { "epoch": 0.0, - "grad_norm": 6.136735379037229, - "learning_rate": 1.258741258741259e-06, - "loss": 1.1542, + "grad_norm": 3.5992495168327494, + "learning_rate": 8.510638297872341e-07, + "loss": 0.7375, "step": 36 }, { "epoch": 0.0, - "grad_norm": 5.0826929997956345, - "learning_rate": 1.2937062937062938e-06, - "loss": 1.1515, + "grad_norm": 3.6656303580340697, + "learning_rate": 8.747044917257684e-07, + "loss": 0.91, "step": 37 }, { "epoch": 0.0, - "grad_norm": 9.642838223265395, - "learning_rate": 1.3286713286713287e-06, - "loss": 1.0195, + "grad_norm": 3.565875190750197, + "learning_rate": 8.983451536643027e-07, + "loss": 0.8641, "step": 38 }, { "epoch": 0.0, - "grad_norm": 21.361559216190493, - "learning_rate": 1.3636363636363636e-06, - "loss": 1.099, + "grad_norm": 3.2659699093645176, + "learning_rate": 9.219858156028369e-07, + "loss": 0.856, "step": 39 }, { "epoch": 0.0, - "grad_norm": 4.808653298973119, - "learning_rate": 1.3986013986013987e-06, - "loss": 1.0705, + "grad_norm": 2.8480726168680865, + "learning_rate": 9.456264775413712e-07, + "loss": 0.7334, "step": 40 }, { "epoch": 0.0, - "grad_norm": 12.37717034792203, - "learning_rate": 1.4335664335664336e-06, - "loss": 1.0939, + "grad_norm": 3.104875322805582, + "learning_rate": 9.692671394799055e-07, + "loss": 0.7909, "step": 41 }, { "epoch": 0.0, - "grad_norm": 6.615327284739696, - "learning_rate": 1.4685314685314685e-06, - "loss": 1.0464, + "grad_norm": 2.4218619588018053, + "learning_rate": 9.929078014184399e-07, + "loss": 0.7058, "step": 42 }, { "epoch": 0.0, - "grad_norm": 4.179391150292978, - "learning_rate": 1.5034965034965034e-06, - "loss": 1.0878, + "grad_norm": 2.477914199444258, + "learning_rate": 1.016548463356974e-06, + "loss": 0.7549, "step": 43 }, { "epoch": 0.0, - "grad_norm": 20.006567323936093, - "learning_rate": 1.5384615384615387e-06, - "loss": 1.1121, + "grad_norm": 2.898055551946699, + "learning_rate": 1.0401891252955083e-06, + "loss": 0.8078, "step": 44 }, { "epoch": 0.0, - "grad_norm": 5.213316956884923, - "learning_rate": 1.5734265734265736e-06, - "loss": 1.0629, + "grad_norm": 3.7995673086732182, + "learning_rate": 1.0638297872340427e-06, + "loss": 0.8143, "step": 45 }, { "epoch": 0.0, - "grad_norm": 4.22220233163102, - "learning_rate": 1.6083916083916085e-06, - "loss": 1.1175, + "grad_norm": 2.3801635193808197, + "learning_rate": 1.087470449172577e-06, + "loss": 0.7865, "step": 46 }, { "epoch": 0.0, - "grad_norm": 3.945556190990595, - "learning_rate": 1.6433566433566434e-06, - "loss": 1.0423, + "grad_norm": 4.202452298258762, + "learning_rate": 1.111111111111111e-06, + "loss": 0.7604, "step": 47 }, { - "epoch": 0.01, - "grad_norm": 3.6470904938849054, - "learning_rate": 1.6783216783216785e-06, - "loss": 1.0511, + "epoch": 0.0, + "grad_norm": 2.3534765508525606, + "learning_rate": 1.1347517730496454e-06, + "loss": 0.7317, "step": 48 }, { - "epoch": 0.01, - "grad_norm": 4.446567502839809, - "learning_rate": 1.7132867132867134e-06, - "loss": 1.0117, + "epoch": 0.0, + "grad_norm": 2.441783143300323, + "learning_rate": 1.1583924349881798e-06, + "loss": 0.7005, "step": 49 }, { - "epoch": 0.01, - "grad_norm": 6.4133295800026815, - "learning_rate": 1.7482517482517483e-06, - "loss": 1.0825, + "epoch": 0.0, + "grad_norm": 2.855261585744242, + "learning_rate": 1.182033096926714e-06, + "loss": 0.8033, "step": 50 }, { - "epoch": 0.01, - "grad_norm": 9.263781826142708, - "learning_rate": 1.7832167832167834e-06, - "loss": 1.0928, + "epoch": 0.0, + "grad_norm": 2.848662778559429, + "learning_rate": 1.2056737588652482e-06, + "loss": 0.7861, "step": 51 }, { - "epoch": 0.01, - "grad_norm": 3.3410522050072218, - "learning_rate": 1.8181818181818183e-06, - "loss": 0.9511, + "epoch": 0.0, + "grad_norm": 2.702131872957413, + "learning_rate": 1.2293144208037826e-06, + "loss": 0.8627, "step": 52 }, { - "epoch": 0.01, - "grad_norm": 5.078099442521805, - "learning_rate": 1.8531468531468532e-06, - "loss": 1.0446, + "epoch": 0.0, + "grad_norm": 2.543996867152144, + "learning_rate": 1.2529550827423168e-06, + "loss": 0.6894, "step": 53 }, { - "epoch": 0.01, - "grad_norm": 3.7783601770563657, - "learning_rate": 1.888111888111888e-06, - "loss": 0.9961, + "epoch": 0.0, + "grad_norm": 2.2014595771896075, + "learning_rate": 1.276595744680851e-06, + "loss": 0.7669, "step": 54 }, { - "epoch": 0.01, - "grad_norm": 5.3245107534214275, - "learning_rate": 1.9230769230769234e-06, - "loss": 1.002, + "epoch": 0.0, + "grad_norm": 2.311136134810509, + "learning_rate": 1.3002364066193854e-06, + "loss": 0.8149, "step": 55 }, { - "epoch": 0.01, - "grad_norm": 4.550873551909127, - "learning_rate": 1.9580419580419583e-06, - "loss": 1.0045, + "epoch": 0.0, + "grad_norm": 2.342133445768906, + "learning_rate": 1.3238770685579196e-06, + "loss": 0.7447, "step": 56 }, { - "epoch": 0.01, - "grad_norm": 5.298300519654384, - "learning_rate": 1.993006993006993e-06, - "loss": 0.9899, + "epoch": 0.0, + "grad_norm": 2.3940363343042708, + "learning_rate": 1.347517730496454e-06, + "loss": 0.8097, "step": 57 }, { - "epoch": 0.01, - "grad_norm": 5.636808401697184, - "learning_rate": 2.027972027972028e-06, - "loss": 1.0293, + "epoch": 0.0, + "grad_norm": 2.756101926155324, + "learning_rate": 1.3711583924349884e-06, + "loss": 0.7584, "step": 58 }, { - "epoch": 0.01, - "grad_norm": 4.989845861785016, - "learning_rate": 2.0629370629370634e-06, - "loss": 0.9782, + "epoch": 0.0, + "grad_norm": 2.809278240572465, + "learning_rate": 1.3947990543735226e-06, + "loss": 0.7809, "step": 59 }, { - "epoch": 0.01, - "grad_norm": 4.036151597648198, - "learning_rate": 2.0979020979020983e-06, - "loss": 0.9476, + "epoch": 0.0, + "grad_norm": 2.6316174176009817, + "learning_rate": 1.418439716312057e-06, + "loss": 0.6602, "step": 60 }, { - "epoch": 0.01, - "grad_norm": 4.748491625435901, - "learning_rate": 2.132867132867133e-06, - "loss": 1.0607, + "epoch": 0.0, + "grad_norm": 2.3577560926340504, + "learning_rate": 1.4420803782505912e-06, + "loss": 0.7189, "step": 61 }, { - "epoch": 0.01, - "grad_norm": 4.981488543652494, - "learning_rate": 2.167832167832168e-06, - "loss": 1.0103, + "epoch": 0.0, + "grad_norm": 2.051199436921334, + "learning_rate": 1.4657210401891254e-06, + "loss": 0.6896, "step": 62 }, { - "epoch": 0.01, - "grad_norm": 2.747396934973974, - "learning_rate": 2.202797202797203e-06, - "loss": 0.9394, + "epoch": 0.0, + "grad_norm": 2.1557201165820703, + "learning_rate": 1.4893617021276596e-06, + "loss": 0.7027, "step": 63 }, { - "epoch": 0.01, - "grad_norm": 6.959028225064991, - "learning_rate": 2.237762237762238e-06, - "loss": 1.0223, + "epoch": 0.0, + "grad_norm": 2.19275007266616, + "learning_rate": 1.513002364066194e-06, + "loss": 0.7195, "step": 64 }, { - "epoch": 0.01, - "grad_norm": 4.1572731448374745, - "learning_rate": 2.2727272727272728e-06, - "loss": 0.8474, + "epoch": 0.0, + "grad_norm": 2.2503664757400452, + "learning_rate": 1.5366430260047282e-06, + "loss": 0.7333, "step": 65 }, { - "epoch": 0.01, - "grad_norm": 4.264763279942374, - "learning_rate": 2.307692307692308e-06, - "loss": 1.0108, + "epoch": 0.0, + "grad_norm": 1.9571027147559699, + "learning_rate": 1.5602836879432626e-06, + "loss": 0.6952, "step": 66 }, { - "epoch": 0.01, - "grad_norm": 6.095523926263317, - "learning_rate": 2.342657342657343e-06, - "loss": 0.9966, + "epoch": 0.0, + "grad_norm": 2.145083354315299, + "learning_rate": 1.583924349881797e-06, + "loss": 0.7175, "step": 67 }, { - "epoch": 0.01, - "grad_norm": 2.0378141611911293, - "learning_rate": 2.377622377622378e-06, - "loss": 0.6418, + "epoch": 0.0, + "grad_norm": 1.8462498834637493, + "learning_rate": 1.6075650118203312e-06, + "loss": 0.8097, "step": 68 }, { - "epoch": 0.01, - "grad_norm": 12.350034886280204, - "learning_rate": 2.4125874125874128e-06, - "loss": 0.9833, + "epoch": 0.0, + "grad_norm": 2.0321790558242383, + "learning_rate": 1.6312056737588656e-06, + "loss": 0.6543, "step": 69 }, { - "epoch": 0.01, - "grad_norm": 4.400350695457104, - "learning_rate": 2.4475524475524477e-06, - "loss": 1.014, + "epoch": 0.0, + "grad_norm": 2.12526700475727, + "learning_rate": 1.6548463356973996e-06, + "loss": 0.707, "step": 70 }, { "epoch": 0.01, - "grad_norm": 8.539091480211638, - "learning_rate": 2.4825174825174825e-06, - "loss": 0.9588, + "grad_norm": 2.309430782589029, + "learning_rate": 1.678486997635934e-06, + "loss": 0.6983, "step": 71 }, { "epoch": 0.01, - "grad_norm": 2.76197207482864, - "learning_rate": 2.517482517482518e-06, - "loss": 1.0505, + "grad_norm": 1.9668279965506479, + "learning_rate": 1.7021276595744682e-06, + "loss": 0.754, "step": 72 }, { "epoch": 0.01, - "grad_norm": 4.105444561403831, - "learning_rate": 2.5524475524475528e-06, - "loss": 1.0268, + "grad_norm": 0.7288353278623705, + "learning_rate": 1.7257683215130026e-06, + "loss": 0.4241, "step": 73 }, { "epoch": 0.01, - "grad_norm": 4.808311827182721, - "learning_rate": 2.5874125874125877e-06, - "loss": 0.9266, + "grad_norm": 2.697370867137166, + "learning_rate": 1.7494089834515368e-06, + "loss": 0.6779, "step": 74 }, { "epoch": 0.01, - "grad_norm": 3.3062924637115896, - "learning_rate": 2.6223776223776225e-06, - "loss": 0.9497, + "grad_norm": 2.07421670502202, + "learning_rate": 1.7730496453900712e-06, + "loss": 0.6492, "step": 75 }, { "epoch": 0.01, - "grad_norm": 5.359465673010584, - "learning_rate": 2.6573426573426574e-06, - "loss": 1.0061, + "grad_norm": 2.138915505095088, + "learning_rate": 1.7966903073286054e-06, + "loss": 0.7194, "step": 76 }, { "epoch": 0.01, - "grad_norm": 3.5894653414802122, - "learning_rate": 2.6923076923076923e-06, - "loss": 0.9365, + "grad_norm": 1.9771665655126607, + "learning_rate": 1.8203309692671398e-06, + "loss": 0.7739, "step": 77 }, { "epoch": 0.01, - "grad_norm": 5.6964431945565215, - "learning_rate": 2.7272727272727272e-06, - "loss": 0.9945, + "grad_norm": 3.391421759862296, + "learning_rate": 1.8439716312056737e-06, + "loss": 0.7211, "step": 78 }, { "epoch": 0.01, - "grad_norm": 3.554710415267673, - "learning_rate": 2.762237762237762e-06, - "loss": 0.909, + "grad_norm": 2.2353836099243853, + "learning_rate": 1.8676122931442081e-06, + "loss": 0.7786, "step": 79 }, { "epoch": 0.01, - "grad_norm": 3.6224966193912933, - "learning_rate": 2.7972027972027974e-06, - "loss": 0.9841, + "grad_norm": 1.9268430730450932, + "learning_rate": 1.8912529550827423e-06, + "loss": 0.6035, "step": 80 }, { "epoch": 0.01, - "grad_norm": 4.100484077566709, - "learning_rate": 2.8321678321678323e-06, - "loss": 0.9785, + "grad_norm": 2.1840594946106324, + "learning_rate": 1.9148936170212767e-06, + "loss": 0.6269, "step": 81 }, { "epoch": 0.01, - "grad_norm": 2.7434549809880306, - "learning_rate": 2.8671328671328672e-06, - "loss": 0.9091, + "grad_norm": 2.5120810844420296, + "learning_rate": 1.938534278959811e-06, + "loss": 0.7292, "step": 82 }, { "epoch": 0.01, - "grad_norm": 2.910473094338652, - "learning_rate": 2.902097902097902e-06, - "loss": 0.9128, + "grad_norm": 2.089928020757947, + "learning_rate": 1.9621749408983455e-06, + "loss": 0.6384, "step": 83 }, { "epoch": 0.01, - "grad_norm": 4.681590491267232, - "learning_rate": 2.937062937062937e-06, - "loss": 0.9723, + "grad_norm": 2.5159637071868013, + "learning_rate": 1.9858156028368797e-06, + "loss": 0.6891, "step": 84 }, { "epoch": 0.01, - "grad_norm": 3.8481519631092667, - "learning_rate": 2.972027972027972e-06, - "loss": 0.9126, + "grad_norm": 2.2092160263245137, + "learning_rate": 2.009456264775414e-06, + "loss": 0.728, "step": 85 }, { "epoch": 0.01, - "grad_norm": 3.061017336429524, - "learning_rate": 3.006993006993007e-06, - "loss": 0.949, + "grad_norm": 2.976966258807257, + "learning_rate": 2.033096926713948e-06, + "loss": 0.6447, "step": 86 }, { "epoch": 0.01, - "grad_norm": 3.1054284861421526, - "learning_rate": 3.0419580419580425e-06, - "loss": 0.954, + "grad_norm": 2.394943049290893, + "learning_rate": 2.0567375886524823e-06, + "loss": 0.6845, "step": 87 }, { "epoch": 0.01, - "grad_norm": 5.216222090801437, - "learning_rate": 3.0769230769230774e-06, - "loss": 0.999, + "grad_norm": 2.5464700605671347, + "learning_rate": 2.0803782505910165e-06, + "loss": 0.761, "step": 88 }, { "epoch": 0.01, - "grad_norm": 9.344055127943243, - "learning_rate": 3.1118881118881123e-06, - "loss": 0.8865, + "grad_norm": 2.85921586081652, + "learning_rate": 2.104018912529551e-06, + "loss": 0.7155, "step": 89 }, { "epoch": 0.01, - "grad_norm": 3.916337109234207, - "learning_rate": 3.1468531468531472e-06, - "loss": 0.9985, + "grad_norm": 2.119553769728487, + "learning_rate": 2.1276595744680853e-06, + "loss": 0.6555, "step": 90 }, { "epoch": 0.01, - "grad_norm": 5.646273915281262, - "learning_rate": 3.181818181818182e-06, - "loss": 0.9368, + "grad_norm": 2.0326435937348313, + "learning_rate": 2.1513002364066195e-06, + "loss": 0.6183, "step": 91 }, { "epoch": 0.01, - "grad_norm": 3.2528928171773566, - "learning_rate": 3.216783216783217e-06, - "loss": 0.8946, + "grad_norm": 2.185959844338953, + "learning_rate": 2.174940898345154e-06, + "loss": 0.6494, "step": 92 }, { "epoch": 0.01, - "grad_norm": 3.670080997940339, - "learning_rate": 3.251748251748252e-06, - "loss": 0.883, + "grad_norm": 2.0629475084059496, + "learning_rate": 2.1985815602836883e-06, + "loss": 0.6943, "step": 93 }, { "epoch": 0.01, - "grad_norm": 4.3498888254458885, - "learning_rate": 3.286713286713287e-06, - "loss": 0.8334, + "grad_norm": 2.1226083857699036, + "learning_rate": 2.222222222222222e-06, + "loss": 0.6199, "step": 94 }, { "epoch": 0.01, - "grad_norm": 3.4044098902580067, - "learning_rate": 3.321678321678322e-06, - "loss": 0.8982, + "grad_norm": 2.1156533869945293, + "learning_rate": 2.2458628841607567e-06, + "loss": 0.7093, "step": 95 }, { "epoch": 0.01, - "grad_norm": 5.964267535512436, - "learning_rate": 3.356643356643357e-06, - "loss": 0.901, + "grad_norm": 0.7452190102948758, + "learning_rate": 2.269503546099291e-06, + "loss": 0.395, "step": 96 }, { "epoch": 0.01, - "grad_norm": 7.2547749034420175, - "learning_rate": 3.391608391608392e-06, - "loss": 0.888, + "grad_norm": 2.0434874621946433, + "learning_rate": 2.293144208037825e-06, + "loss": 0.56, "step": 97 }, { "epoch": 0.01, - "grad_norm": 3.6445555952684594, - "learning_rate": 3.426573426573427e-06, - "loss": 0.9357, + "grad_norm": 0.7957635107147354, + "learning_rate": 2.3167848699763597e-06, + "loss": 0.4229, "step": 98 }, { "epoch": 0.01, - "grad_norm": 3.6183641170544676, - "learning_rate": 3.4615384615384617e-06, - "loss": 0.8898, + "grad_norm": 2.183195640197295, + "learning_rate": 2.340425531914894e-06, + "loss": 0.708, "step": 99 }, { "epoch": 0.01, - "grad_norm": 3.683704401531074, - "learning_rate": 3.4965034965034966e-06, - "loss": 0.9716, + "grad_norm": 2.149757424731977, + "learning_rate": 2.364066193853428e-06, + "loss": 0.6563, "step": 100 }, { "epoch": 0.01, - "grad_norm": 4.642503279279386, - "learning_rate": 3.5314685314685315e-06, - "loss": 0.8517, + "grad_norm": 2.192369099217086, + "learning_rate": 2.3877068557919627e-06, + "loss": 0.6403, "step": 101 }, { "epoch": 0.01, - "grad_norm": 3.5861147598949064, - "learning_rate": 3.566433566433567e-06, - "loss": 0.8891, + "grad_norm": 1.9478973010204859, + "learning_rate": 2.4113475177304965e-06, + "loss": 0.6792, "step": 102 }, { "epoch": 0.01, - "grad_norm": 6.15820179982336, - "learning_rate": 3.6013986013986017e-06, - "loss": 0.9273, + "grad_norm": 1.8758522721544448, + "learning_rate": 2.4349881796690306e-06, + "loss": 0.6538, "step": 103 }, { "epoch": 0.01, - "grad_norm": 3.0966116013351526, - "learning_rate": 3.6363636363636366e-06, - "loss": 0.8954, + "grad_norm": 0.7754239938958422, + "learning_rate": 2.4586288416075653e-06, + "loss": 0.3938, "step": 104 }, { "epoch": 0.01, - "grad_norm": 4.590992779608164, - "learning_rate": 3.6713286713286715e-06, - "loss": 0.8935, + "grad_norm": 2.029679982723797, + "learning_rate": 2.4822695035460995e-06, + "loss": 0.6356, "step": 105 }, { "epoch": 0.01, - "grad_norm": 6.257062049913251, - "learning_rate": 3.7062937062937064e-06, - "loss": 0.7903, + "grad_norm": 3.3219276832674427, + "learning_rate": 2.5059101654846336e-06, + "loss": 0.7016, "step": 106 }, { "epoch": 0.01, - "grad_norm": 3.1160354318045598, - "learning_rate": 3.7412587412587413e-06, - "loss": 0.9888, + "grad_norm": 2.321111196586404, + "learning_rate": 2.529550827423168e-06, + "loss": 0.641, "step": 107 }, { "epoch": 0.01, - "grad_norm": 3.557620314215135, - "learning_rate": 3.776223776223776e-06, - "loss": 0.8218, + "grad_norm": 2.8213641361459265, + "learning_rate": 2.553191489361702e-06, + "loss": 0.6369, "step": 108 }, { "epoch": 0.01, - "grad_norm": 3.690586014565839, - "learning_rate": 3.811188811188811e-06, - "loss": 0.9652, + "grad_norm": 2.066186222098806, + "learning_rate": 2.5768321513002366e-06, + "loss": 0.7249, "step": 109 }, { "epoch": 0.01, - "grad_norm": 4.649988211859253, - "learning_rate": 3.846153846153847e-06, - "loss": 0.874, + "grad_norm": 2.0075314634026085, + "learning_rate": 2.600472813238771e-06, + "loss": 0.6985, "step": 110 }, { "epoch": 0.01, - "grad_norm": 3.4358159594855593, - "learning_rate": 3.881118881118881e-06, - "loss": 0.8646, + "grad_norm": 2.009095582419765, + "learning_rate": 2.624113475177305e-06, + "loss": 0.651, "step": 111 }, { "epoch": 0.01, - "grad_norm": 4.275525174285379, - "learning_rate": 3.916083916083917e-06, - "loss": 0.8884, + "grad_norm": 1.8769117515328484, + "learning_rate": 2.6477541371158392e-06, + "loss": 0.6231, "step": 112 }, { "epoch": 0.01, - "grad_norm": 4.163330070858521, - "learning_rate": 3.951048951048951e-06, - "loss": 0.8856, + "grad_norm": 2.349295938224509, + "learning_rate": 2.671394799054374e-06, + "loss": 0.8078, "step": 113 }, { "epoch": 0.01, - "grad_norm": 2.0432246966048093, - "learning_rate": 3.986013986013986e-06, - "loss": 0.6153, + "grad_norm": 2.392344586533091, + "learning_rate": 2.695035460992908e-06, + "loss": 0.7138, "step": 114 }, { "epoch": 0.01, - "grad_norm": 3.6243328469121807, - "learning_rate": 4.020979020979021e-06, - "loss": 0.9266, + "grad_norm": 1.9772353553735873, + "learning_rate": 2.7186761229314422e-06, + "loss": 0.6069, "step": 115 }, { "epoch": 0.01, - "grad_norm": 3.7038828259874026, - "learning_rate": 4.055944055944056e-06, - "loss": 0.8613, + "grad_norm": 2.2605075851878067, + "learning_rate": 2.742316784869977e-06, + "loss": 0.6807, "step": 116 }, { "epoch": 0.01, - "grad_norm": 6.587444425327127, - "learning_rate": 4.0909090909090915e-06, - "loss": 0.8677, + "grad_norm": 2.0221386736618547, + "learning_rate": 2.765957446808511e-06, + "loss": 0.6383, "step": 117 }, { "epoch": 0.01, - "grad_norm": 2.8022237219414, - "learning_rate": 4.125874125874127e-06, - "loss": 0.9397, + "grad_norm": 2.8449815904598856, + "learning_rate": 2.7895981087470452e-06, + "loss": 0.6347, "step": 118 }, { "epoch": 0.01, - "grad_norm": 4.591322074020894, - "learning_rate": 4.160839160839161e-06, - "loss": 0.8731, + "grad_norm": 7.182608803469407, + "learning_rate": 2.8132387706855794e-06, + "loss": 0.6879, "step": 119 }, { "epoch": 0.01, - "grad_norm": 4.273093982425321, - "learning_rate": 4.195804195804197e-06, - "loss": 0.8526, + "grad_norm": 2.561970103209323, + "learning_rate": 2.836879432624114e-06, + "loss": 0.6197, "step": 120 }, { "epoch": 0.01, - "grad_norm": 4.197128473907411, - "learning_rate": 4.230769230769231e-06, - "loss": 0.8944, + "grad_norm": 2.659226172166337, + "learning_rate": 2.8605200945626482e-06, + "loss": 0.5801, "step": 121 }, { "epoch": 0.01, - "grad_norm": 3.223590603807003, - "learning_rate": 4.265734265734266e-06, - "loss": 0.8939, + "grad_norm": 0.8008052054278582, + "learning_rate": 2.8841607565011824e-06, + "loss": 0.4485, "step": 122 }, { "epoch": 0.01, - "grad_norm": 3.7474696753810433, - "learning_rate": 4.300699300699301e-06, - "loss": 0.893, + "grad_norm": 2.159645629054842, + "learning_rate": 2.907801418439716e-06, + "loss": 0.7098, "step": 123 }, { "epoch": 0.01, - "grad_norm": 7.509667764665236, - "learning_rate": 4.335664335664336e-06, - "loss": 0.8958, + "grad_norm": 2.4491117080711446, + "learning_rate": 2.931442080378251e-06, + "loss": 0.7218, "step": 124 }, { "epoch": 0.01, - "grad_norm": 3.4746005923931715, - "learning_rate": 4.3706293706293715e-06, - "loss": 0.9511, + "grad_norm": 2.5344555213359006, + "learning_rate": 2.955082742316785e-06, + "loss": 0.7083, "step": 125 }, { "epoch": 0.01, - "grad_norm": 3.8752998189935024, - "learning_rate": 4.405594405594406e-06, - "loss": 0.9831, + "grad_norm": 2.802208160376501, + "learning_rate": 2.978723404255319e-06, + "loss": 0.6903, "step": 126 }, { "epoch": 0.01, - "grad_norm": 2.6750347835184956, - "learning_rate": 4.440559440559441e-06, - "loss": 0.8389, + "grad_norm": 2.401066542997042, + "learning_rate": 3.0023640661938534e-06, + "loss": 0.5544, "step": 127 }, { "epoch": 0.01, - "grad_norm": 3.059225334055095, - "learning_rate": 4.475524475524476e-06, - "loss": 0.9191, + "grad_norm": 2.0520881776094186, + "learning_rate": 3.026004728132388e-06, + "loss": 0.5865, "step": 128 }, { "epoch": 0.01, - "grad_norm": 2.8492139079466123, - "learning_rate": 4.510489510489511e-06, - "loss": 0.9256, + "grad_norm": 2.02432126603985, + "learning_rate": 3.049645390070922e-06, + "loss": 0.6963, "step": 129 }, { "epoch": 0.01, - "grad_norm": 3.466866349881368, - "learning_rate": 4.5454545454545455e-06, - "loss": 0.9199, + "grad_norm": 2.121749012154649, + "learning_rate": 3.0732860520094564e-06, + "loss": 0.6213, "step": 130 }, { "epoch": 0.01, - "grad_norm": 2.5173267002299946, - "learning_rate": 4.580419580419581e-06, - "loss": 0.8514, + "grad_norm": 2.256329056344549, + "learning_rate": 3.096926713947991e-06, + "loss": 0.6434, "step": 131 }, { "epoch": 0.01, - "grad_norm": 3.6745126525258236, - "learning_rate": 4.615384615384616e-06, - "loss": 0.9873, + "grad_norm": 3.7872109464815242, + "learning_rate": 3.120567375886525e-06, + "loss": 0.6728, "step": 132 }, { "epoch": 0.01, - "grad_norm": 2.9338742337892234, - "learning_rate": 4.650349650349651e-06, - "loss": 0.8569, + "grad_norm": 2.006021966296559, + "learning_rate": 3.1442080378250594e-06, + "loss": 0.6654, "step": 133 }, { "epoch": 0.01, - "grad_norm": 9.843844549233959, - "learning_rate": 4.685314685314686e-06, - "loss": 0.9136, + "grad_norm": 2.917709437476842, + "learning_rate": 3.167848699763594e-06, + "loss": 0.5955, "step": 134 }, { "epoch": 0.01, - "grad_norm": 2.624661321804515, - "learning_rate": 4.72027972027972e-06, - "loss": 0.9501, + "grad_norm": 2.1049139535388797, + "learning_rate": 3.191489361702128e-06, + "loss": 0.6252, "step": 135 }, { "epoch": 0.01, - "grad_norm": 3.175344406567936, - "learning_rate": 4.755244755244756e-06, - "loss": 0.7571, + "grad_norm": 2.4296111270711354, + "learning_rate": 3.2151300236406624e-06, + "loss": 0.6514, "step": 136 }, { "epoch": 0.01, - "grad_norm": 2.9193507761169832, - "learning_rate": 4.79020979020979e-06, - "loss": 0.8804, + "grad_norm": 2.15678878363444, + "learning_rate": 3.2387706855791966e-06, + "loss": 0.6498, "step": 137 }, { "epoch": 0.01, - "grad_norm": 3.136679694610541, - "learning_rate": 4.8251748251748255e-06, - "loss": 0.8397, + "grad_norm": 2.619197676015908, + "learning_rate": 3.262411347517731e-06, + "loss": 0.6464, "step": 138 }, { "epoch": 0.01, - "grad_norm": 4.002722016275543, - "learning_rate": 4.86013986013986e-06, - "loss": 0.7947, + "grad_norm": 2.13284102430054, + "learning_rate": 3.286052009456265e-06, + "loss": 0.6549, "step": 139 }, { "epoch": 0.01, - "grad_norm": 4.059401974397323, - "learning_rate": 4.895104895104895e-06, - "loss": 0.9127, + "grad_norm": 2.6616228935224164, + "learning_rate": 3.309692671394799e-06, + "loss": 0.608, "step": 140 }, { "epoch": 0.01, - "grad_norm": 6.278913674680803, - "learning_rate": 4.930069930069931e-06, - "loss": 0.88, + "grad_norm": 2.1618771571085063, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6672, "step": 141 }, { "epoch": 0.01, - "grad_norm": 4.830877808380784, - "learning_rate": 4.965034965034965e-06, - "loss": 0.9, + "grad_norm": 2.169749292188095, + "learning_rate": 3.356973995271868e-06, + "loss": 0.6685, "step": 142 }, { - "epoch": 0.02, - "grad_norm": 3.983823394564358, - "learning_rate": 5e-06, - "loss": 0.9034, + "epoch": 0.01, + "grad_norm": 1.8560454137735654, + "learning_rate": 3.380614657210402e-06, + "loss": 0.6295, "step": 143 }, { - "epoch": 0.02, - "grad_norm": 5.804644998821451, - "learning_rate": 5.034965034965036e-06, - "loss": 0.7911, + "epoch": 0.01, + "grad_norm": 2.768424806113618, + "learning_rate": 3.4042553191489363e-06, + "loss": 0.6793, "step": 144 }, { - "epoch": 0.02, - "grad_norm": 4.991083677118832, - "learning_rate": 5.06993006993007e-06, - "loss": 0.9021, + "epoch": 0.01, + "grad_norm": 2.1661559577935954, + "learning_rate": 3.4278959810874705e-06, + "loss": 0.6656, "step": 145 }, { - "epoch": 0.02, - "grad_norm": 5.151416376092996, - "learning_rate": 5.1048951048951055e-06, - "loss": 0.9136, + "epoch": 0.01, + "grad_norm": 2.3715922653346153, + "learning_rate": 3.451536643026005e-06, + "loss": 0.6461, "step": 146 }, { - "epoch": 0.02, - "grad_norm": 3.2803307360013227, - "learning_rate": 5.13986013986014e-06, - "loss": 0.862, + "epoch": 0.01, + "grad_norm": 3.948531044351373, + "learning_rate": 3.4751773049645393e-06, + "loss": 0.697, "step": 147 }, { - "epoch": 0.02, - "grad_norm": 3.2255731360831343, - "learning_rate": 5.174825174825175e-06, - "loss": 0.8045, + "epoch": 0.01, + "grad_norm": 2.2048575774614254, + "learning_rate": 3.4988179669030735e-06, + "loss": 0.6424, "step": 148 }, { - "epoch": 0.02, - "grad_norm": 10.688893991857855, - "learning_rate": 5.20979020979021e-06, - "loss": 0.8606, + "epoch": 0.01, + "grad_norm": 2.6250354018953552, + "learning_rate": 3.522458628841608e-06, + "loss": 0.6753, "step": 149 }, { - "epoch": 0.02, - "grad_norm": 3.0166064964847963, - "learning_rate": 5.244755244755245e-06, - "loss": 0.8728, + "epoch": 0.01, + "grad_norm": 1.9528489349432132, + "learning_rate": 3.5460992907801423e-06, + "loss": 0.5661, "step": 150 }, { - "epoch": 0.02, - "grad_norm": 5.697806716640856, - "learning_rate": 5.27972027972028e-06, - "loss": 0.8277, + "epoch": 0.01, + "grad_norm": 6.686830835718987, + "learning_rate": 3.5697399527186765e-06, + "loss": 0.5791, "step": 151 }, { - "epoch": 0.02, - "grad_norm": 3.9797528895264196, - "learning_rate": 5.314685314685315e-06, - "loss": 0.8832, + "epoch": 0.01, + "grad_norm": 2.1607801468413625, + "learning_rate": 3.5933806146572107e-06, + "loss": 0.6341, "step": 152 }, { - "epoch": 0.02, - "grad_norm": 3.5741820214637494, - "learning_rate": 5.34965034965035e-06, - "loss": 0.7884, + "epoch": 0.01, + "grad_norm": 1.9659459506612003, + "learning_rate": 3.6170212765957453e-06, + "loss": 0.6487, "step": 153 }, { - "epoch": 0.02, - "grad_norm": 3.904019428236043, - "learning_rate": 5.384615384615385e-06, - "loss": 0.8818, + "epoch": 0.01, + "grad_norm": 1.9634120209876291, + "learning_rate": 3.6406619385342795e-06, + "loss": 0.712, "step": 154 }, { - "epoch": 0.02, - "grad_norm": 5.113543577877246, - "learning_rate": 5.41958041958042e-06, - "loss": 0.8515, + "epoch": 0.01, + "grad_norm": 2.3123339067049553, + "learning_rate": 3.6643026004728133e-06, + "loss": 0.6519, "step": 155 }, { - "epoch": 0.02, - "grad_norm": 3.916916159534253, - "learning_rate": 5.4545454545454545e-06, - "loss": 0.883, + "epoch": 0.01, + "grad_norm": 2.036683460177132, + "learning_rate": 3.6879432624113475e-06, + "loss": 0.6476, "step": 156 }, { - "epoch": 0.02, - "grad_norm": 3.026332966896275, - "learning_rate": 5.48951048951049e-06, - "loss": 0.8202, + "epoch": 0.01, + "grad_norm": 2.275478462736982, + "learning_rate": 3.711583924349882e-06, + "loss": 0.6598, "step": 157 }, { - "epoch": 0.02, - "grad_norm": 4.2747535576313895, - "learning_rate": 5.524475524475524e-06, - "loss": 0.8201, + "epoch": 0.01, + "grad_norm": 2.0982463942842586, + "learning_rate": 3.7352245862884163e-06, + "loss": 0.6339, "step": 158 }, { - "epoch": 0.02, - "grad_norm": 4.915673342738748, - "learning_rate": 5.5594405594405596e-06, - "loss": 0.8054, + "epoch": 0.01, + "grad_norm": 2.0645188410665365, + "learning_rate": 3.7588652482269505e-06, + "loss": 0.6329, "step": 159 }, { - "epoch": 0.02, - "grad_norm": 1.8868567094503492, - "learning_rate": 5.594405594405595e-06, - "loss": 0.5897, + "epoch": 0.01, + "grad_norm": 0.7775022337559055, + "learning_rate": 3.7825059101654847e-06, + "loss": 0.4324, "step": 160 }, { - "epoch": 0.02, - "grad_norm": 4.031567311252644, - "learning_rate": 5.629370629370629e-06, - "loss": 0.9305, + "epoch": 0.01, + "grad_norm": 2.148020865745057, + "learning_rate": 3.8061465721040193e-06, + "loss": 0.6783, "step": 161 }, { - "epoch": 0.02, - "grad_norm": 5.750704520517035, - "learning_rate": 5.664335664335665e-06, - "loss": 0.8506, + "epoch": 0.01, + "grad_norm": 2.4111658323753002, + "learning_rate": 3.8297872340425535e-06, + "loss": 0.7128, "step": 162 }, { - "epoch": 0.02, - "grad_norm": 3.9833189282740107, - "learning_rate": 5.699300699300699e-06, - "loss": 0.7708, + "epoch": 0.01, + "grad_norm": 2.3040776568311907, + "learning_rate": 3.853427895981088e-06, + "loss": 0.6391, "step": 163 }, { - "epoch": 0.02, - "grad_norm": 8.354822607813489, - "learning_rate": 5.7342657342657345e-06, - "loss": 0.8819, + "epoch": 0.01, + "grad_norm": 2.248369138677927, + "learning_rate": 3.877068557919622e-06, + "loss": 0.6806, "step": 164 }, { - "epoch": 0.02, - "grad_norm": 3.130090251045954, - "learning_rate": 5.769230769230769e-06, - "loss": 0.769, + "epoch": 0.01, + "grad_norm": 2.832519970664543, + "learning_rate": 3.9007092198581565e-06, + "loss": 0.4929, "step": 165 }, { - "epoch": 0.02, - "grad_norm": 5.0268037848757725, - "learning_rate": 5.804195804195804e-06, - "loss": 0.8636, + "epoch": 0.01, + "grad_norm": 1.9986252116661374, + "learning_rate": 3.924349881796691e-06, + "loss": 0.6269, "step": 166 }, { - "epoch": 0.02, - "grad_norm": 4.590445411100409, - "learning_rate": 5.83916083916084e-06, - "loss": 0.9014, + "epoch": 0.01, + "grad_norm": 2.2030260568042612, + "learning_rate": 3.947990543735225e-06, + "loss": 0.6258, "step": 167 }, { - "epoch": 0.02, - "grad_norm": 3.9573676244626235, - "learning_rate": 5.874125874125874e-06, - "loss": 0.9767, + "epoch": 0.01, + "grad_norm": 2.1303847063562737, + "learning_rate": 3.9716312056737595e-06, + "loss": 0.6312, "step": 168 }, { - "epoch": 0.02, - "grad_norm": 3.7976970282517706, - "learning_rate": 5.90909090909091e-06, - "loss": 0.7677, + "epoch": 0.01, + "grad_norm": 0.8692761665553068, + "learning_rate": 3.995271867612294e-06, + "loss": 0.3962, "step": 169 }, { - "epoch": 0.02, - "grad_norm": 3.20200552669586, - "learning_rate": 5.944055944055944e-06, - "loss": 0.8744, + "epoch": 0.01, + "grad_norm": 3.4747041382838306, + "learning_rate": 4.018912529550828e-06, + "loss": 0.605, "step": 170 }, { - "epoch": 0.02, - "grad_norm": 5.403103369719815, - "learning_rate": 5.97902097902098e-06, - "loss": 0.7684, + "epoch": 0.01, + "grad_norm": 2.000020680430264, + "learning_rate": 4.042553191489362e-06, + "loss": 0.6253, "step": 171 }, { - "epoch": 0.02, - "grad_norm": 5.688672125161367, - "learning_rate": 6.013986013986014e-06, - "loss": 0.828, + "epoch": 0.01, + "grad_norm": 1.9561153891124494, + "learning_rate": 4.066193853427896e-06, + "loss": 0.5603, "step": 172 }, { - "epoch": 0.02, - "grad_norm": 3.300203165530322, - "learning_rate": 6.04895104895105e-06, - "loss": 0.8845, + "epoch": 0.01, + "grad_norm": 2.231992534936014, + "learning_rate": 4.08983451536643e-06, + "loss": 0.6216, "step": 173 }, { - "epoch": 0.02, - "grad_norm": 2.9549829165853074, - "learning_rate": 6.083916083916085e-06, - "loss": 0.8396, + "epoch": 0.01, + "grad_norm": 1.8735804858344745, + "learning_rate": 4.113475177304965e-06, + "loss": 0.5307, "step": 174 }, { - "epoch": 0.02, - "grad_norm": 3.137866556517222, - "learning_rate": 6.1188811188811196e-06, - "loss": 0.8187, + "epoch": 0.01, + "grad_norm": 2.2950329634408484, + "learning_rate": 4.137115839243499e-06, + "loss": 0.6554, "step": 175 }, { - "epoch": 0.02, - "grad_norm": 4.821237941276991, - "learning_rate": 6.153846153846155e-06, - "loss": 0.8752, + "epoch": 0.01, + "grad_norm": 1.7955483583206069, + "learning_rate": 4.160756501182033e-06, + "loss": 0.5008, "step": 176 }, { - "epoch": 0.02, - "grad_norm": 3.1343880638693324, - "learning_rate": 6.188811188811189e-06, - "loss": 0.8322, + "epoch": 0.01, + "grad_norm": 2.065040736707046, + "learning_rate": 4.184397163120568e-06, + "loss": 0.6066, "step": 177 }, { - "epoch": 0.02, - "grad_norm": 3.187172327993686, - "learning_rate": 6.223776223776225e-06, - "loss": 0.7712, + "epoch": 0.01, + "grad_norm": 0.9176646303520286, + "learning_rate": 4.208037825059102e-06, + "loss": 0.438, "step": 178 }, { - "epoch": 0.02, - "grad_norm": 3.822050694898768, - "learning_rate": 6.258741258741259e-06, - "loss": 0.8855, + "epoch": 0.01, + "grad_norm": 2.423573513431105, + "learning_rate": 4.231678486997636e-06, + "loss": 0.6705, "step": 179 }, { - "epoch": 0.02, - "grad_norm": 4.567034848940679, - "learning_rate": 6.2937062937062944e-06, - "loss": 0.8808, + "epoch": 0.01, + "grad_norm": 3.912752947175479, + "learning_rate": 4.255319148936171e-06, + "loss": 0.6361, "step": 180 }, { - "epoch": 0.02, - "grad_norm": 3.878919913487739, - "learning_rate": 6.32867132867133e-06, - "loss": 0.816, + "epoch": 0.01, + "grad_norm": 2.142787170184617, + "learning_rate": 4.278959810874705e-06, + "loss": 0.5986, "step": 181 }, { - "epoch": 0.02, - "grad_norm": 3.235297169720441, - "learning_rate": 6.363636363636364e-06, - "loss": 0.7188, + "epoch": 0.01, + "grad_norm": 2.1243054517956446, + "learning_rate": 4.302600472813239e-06, + "loss": 0.6819, "step": 182 }, { - "epoch": 0.02, - "grad_norm": 3.930819892851912, - "learning_rate": 6.3986013986013996e-06, - "loss": 0.9284, + "epoch": 0.01, + "grad_norm": 6.0177085628374165, + "learning_rate": 4.326241134751774e-06, + "loss": 0.5869, "step": 183 }, { - "epoch": 0.02, - "grad_norm": 3.670121712591447, - "learning_rate": 6.433566433566434e-06, - "loss": 0.7796, + "epoch": 0.01, + "grad_norm": 2.4128712648456, + "learning_rate": 4.349881796690308e-06, + "loss": 0.6538, "step": 184 }, { - "epoch": 0.02, - "grad_norm": 2.6886770215888207, - "learning_rate": 6.468531468531469e-06, - "loss": 0.8803, + "epoch": 0.01, + "grad_norm": 2.6524776635188654, + "learning_rate": 4.373522458628842e-06, + "loss": 0.6518, "step": 185 }, { - "epoch": 0.02, - "grad_norm": 3.5148986203471853, - "learning_rate": 6.503496503496504e-06, - "loss": 0.8603, + "epoch": 0.01, + "grad_norm": 2.2102545510210057, + "learning_rate": 4.397163120567377e-06, + "loss": 0.5818, "step": 186 }, { - "epoch": 0.02, - "grad_norm": 8.671074065374444, - "learning_rate": 6.538461538461539e-06, - "loss": 0.8593, + "epoch": 0.01, + "grad_norm": 2.8059648964021413, + "learning_rate": 4.42080378250591e-06, + "loss": 0.5986, "step": 187 }, { - "epoch": 0.02, - "grad_norm": 4.011761895974061, - "learning_rate": 6.573426573426574e-06, - "loss": 0.8902, + "epoch": 0.01, + "grad_norm": 2.029891636654725, + "learning_rate": 4.444444444444444e-06, + "loss": 0.6285, "step": 188 }, { - "epoch": 0.02, - "grad_norm": 2.48637995228731, - "learning_rate": 6.608391608391609e-06, - "loss": 0.7607, + "epoch": 0.01, + "grad_norm": 1.7976156623824606, + "learning_rate": 4.468085106382979e-06, + "loss": 0.6518, "step": 189 }, { - "epoch": 0.02, - "grad_norm": 3.3132599184181504, - "learning_rate": 6.643356643356644e-06, - "loss": 0.8014, + "epoch": 0.01, + "grad_norm": 1.8791569032450737, + "learning_rate": 4.491725768321513e-06, + "loss": 0.6251, "step": 190 }, { - "epoch": 0.02, - "grad_norm": 3.055806698354549, - "learning_rate": 6.678321678321679e-06, - "loss": 0.9812, + "epoch": 0.01, + "grad_norm": 2.604925920135495, + "learning_rate": 4.515366430260047e-06, + "loss": 0.7178, "step": 191 }, { - "epoch": 0.02, - "grad_norm": 3.2543072248631977, - "learning_rate": 6.713286713286714e-06, - "loss": 0.8687, + "epoch": 0.01, + "grad_norm": 2.479708832685261, + "learning_rate": 4.539007092198582e-06, + "loss": 0.6542, "step": 192 }, { - "epoch": 0.02, - "grad_norm": 3.040625426096151, - "learning_rate": 6.7482517482517485e-06, - "loss": 0.7797, + "epoch": 0.01, + "grad_norm": 4.2313100639606365, + "learning_rate": 4.562647754137116e-06, + "loss": 0.6172, "step": 193 }, { - "epoch": 0.02, - "grad_norm": 3.215620104060743, - "learning_rate": 6.783216783216784e-06, - "loss": 0.8274, + "epoch": 0.01, + "grad_norm": 2.3372270218352504, + "learning_rate": 4.58628841607565e-06, + "loss": 0.5594, "step": 194 }, { - "epoch": 0.02, - "grad_norm": 2.471180655628137, - "learning_rate": 6.818181818181818e-06, - "loss": 0.6187, + "epoch": 0.01, + "grad_norm": 0.9384828561671921, + "learning_rate": 4.609929078014185e-06, + "loss": 0.4505, "step": 195 }, { - "epoch": 0.02, - "grad_norm": 4.359167344334515, - "learning_rate": 6.853146853146854e-06, - "loss": 0.8114, + "epoch": 0.01, + "grad_norm": 2.6589284000373308, + "learning_rate": 4.633569739952719e-06, + "loss": 0.5785, "step": 196 }, { - "epoch": 0.02, - "grad_norm": 4.290199760313723, - "learning_rate": 6.888111888111889e-06, - "loss": 0.8188, + "epoch": 0.01, + "grad_norm": 2.670232525491877, + "learning_rate": 4.657210401891253e-06, + "loss": 0.6324, "step": 197 }, { - "epoch": 0.02, - "grad_norm": 5.885960883267336, - "learning_rate": 6.923076923076923e-06, - "loss": 0.8622, + "epoch": 0.01, + "grad_norm": 6.875992556481169, + "learning_rate": 4.680851063829788e-06, + "loss": 0.698, "step": 198 }, { - "epoch": 0.02, - "grad_norm": 2.63144492234704, - "learning_rate": 6.958041958041959e-06, - "loss": 0.9063, + "epoch": 0.01, + "grad_norm": 2.305184666203248, + "learning_rate": 4.704491725768322e-06, + "loss": 0.6553, "step": 199 }, { - "epoch": 0.02, - "grad_norm": 3.3612366109316167, - "learning_rate": 6.993006993006993e-06, - "loss": 0.862, + "epoch": 0.01, + "grad_norm": 2.254527590111259, + "learning_rate": 4.728132387706856e-06, + "loss": 0.6835, "step": 200 }, { - "epoch": 0.02, - "grad_norm": 4.586306665535676, - "learning_rate": 7.0279720279720285e-06, - "loss": 0.8577, + "epoch": 0.01, + "grad_norm": 2.159689917649488, + "learning_rate": 4.751773049645391e-06, + "loss": 0.617, "step": 201 }, { - "epoch": 0.02, - "grad_norm": 3.2176272960473553, - "learning_rate": 7.062937062937063e-06, - "loss": 0.8225, + "epoch": 0.01, + "grad_norm": 2.0960135032150142, + "learning_rate": 4.775413711583925e-06, + "loss": 0.6538, "step": 202 }, { - "epoch": 0.02, - "grad_norm": 3.416044727353836, - "learning_rate": 7.097902097902098e-06, - "loss": 0.8681, + "epoch": 0.01, + "grad_norm": 1.0146064500883962, + "learning_rate": 4.799054373522459e-06, + "loss": 0.4311, "step": 203 }, { - "epoch": 0.02, - "grad_norm": 5.4811630787160945, - "learning_rate": 7.132867132867134e-06, - "loss": 0.8767, + "epoch": 0.01, + "grad_norm": 1.8639200163765812, + "learning_rate": 4.822695035460993e-06, + "loss": 0.5789, "step": 204 }, { - "epoch": 0.02, - "grad_norm": 3.567725865125878, - "learning_rate": 7.167832167832168e-06, - "loss": 0.8528, + "epoch": 0.01, + "grad_norm": 2.352322437698567, + "learning_rate": 4.8463356973995275e-06, + "loss": 0.584, "step": 205 }, { - "epoch": 0.02, - "grad_norm": 3.2885446735714443, - "learning_rate": 7.202797202797203e-06, - "loss": 0.756, + "epoch": 0.01, + "grad_norm": 2.0169475662646694, + "learning_rate": 4.869976359338061e-06, + "loss": 0.5818, "step": 206 }, { - "epoch": 0.02, - "grad_norm": 4.884812749781261, - "learning_rate": 7.237762237762238e-06, - "loss": 0.8742, + "epoch": 0.01, + "grad_norm": 2.042454197711281, + "learning_rate": 4.893617021276596e-06, + "loss": 0.5529, "step": 207 }, { - "epoch": 0.02, - "grad_norm": 3.2827496038865385, - "learning_rate": 7.272727272727273e-06, - "loss": 0.854, + "epoch": 0.01, + "grad_norm": 3.0202756196873866, + "learning_rate": 4.9172576832151305e-06, + "loss": 0.63, "step": 208 }, { - "epoch": 0.02, - "grad_norm": 3.8769620411554135, - "learning_rate": 7.307692307692308e-06, - "loss": 0.9437, + "epoch": 0.01, + "grad_norm": 2.2172392101445784, + "learning_rate": 4.940898345153664e-06, + "loss": 0.6311, "step": 209 }, { - "epoch": 0.02, - "grad_norm": 3.152946273455516, - "learning_rate": 7.342657342657343e-06, - "loss": 0.7662, + "epoch": 0.01, + "grad_norm": 2.431770103937424, + "learning_rate": 4.964539007092199e-06, + "loss": 0.741, "step": 210 }, { - "epoch": 0.02, - "grad_norm": 2.7643870516857074, - "learning_rate": 7.377622377622379e-06, - "loss": 0.9363, + "epoch": 0.01, + "grad_norm": 2.0743363874139136, + "learning_rate": 4.9881796690307335e-06, + "loss": 0.6057, "step": 211 }, { "epoch": 0.02, - "grad_norm": 3.519504752467641, - "learning_rate": 7.412587412587413e-06, - "loss": 0.8056, + "grad_norm": 2.1884757140997397, + "learning_rate": 5.011820330969267e-06, + "loss": 0.6588, "step": 212 }, { "epoch": 0.02, - "grad_norm": 1.7919893275590533, - "learning_rate": 7.447552447552449e-06, - "loss": 0.6353, + "grad_norm": 2.040171022578918, + "learning_rate": 5.035460992907801e-06, + "loss": 0.6113, "step": 213 }, { "epoch": 0.02, - "grad_norm": 2.9052671409624167, - "learning_rate": 7.4825174825174825e-06, - "loss": 0.8335, + "grad_norm": 2.1314100402836016, + "learning_rate": 5.059101654846336e-06, + "loss": 0.6767, "step": 214 }, { "epoch": 0.02, - "grad_norm": 3.479591408911271, - "learning_rate": 7.517482517482519e-06, - "loss": 0.8493, + "grad_norm": 2.0336404153087533, + "learning_rate": 5.08274231678487e-06, + "loss": 0.6434, "step": 215 }, { "epoch": 0.02, - "grad_norm": 4.377375722973853, - "learning_rate": 7.552447552447552e-06, - "loss": 0.7782, + "grad_norm": 2.3218487362109674, + "learning_rate": 5.106382978723404e-06, + "loss": 0.594, "step": 216 }, { "epoch": 0.02, - "grad_norm": 3.2542755743953102, - "learning_rate": 7.5874125874125885e-06, - "loss": 0.8053, + "grad_norm": 2.1145117259166986, + "learning_rate": 5.130023640661939e-06, + "loss": 0.6236, "step": 217 }, { "epoch": 0.02, - "grad_norm": 3.2686387521722158, - "learning_rate": 7.622377622377622e-06, - "loss": 0.8326, + "grad_norm": 1.8190050333578367, + "learning_rate": 5.153664302600473e-06, + "loss": 0.5533, "step": 218 }, { "epoch": 0.02, - "grad_norm": 8.023160327593423, - "learning_rate": 7.657342657342658e-06, - "loss": 0.8383, + "grad_norm": 2.0695476922562404, + "learning_rate": 5.177304964539007e-06, + "loss": 0.5921, "step": 219 }, { "epoch": 0.02, - "grad_norm": 3.0319106359639614, - "learning_rate": 7.692307692307694e-06, - "loss": 0.873, + "grad_norm": 4.022709173960421, + "learning_rate": 5.200945626477542e-06, + "loss": 0.6494, "step": 220 }, { "epoch": 0.02, - "grad_norm": 3.857091847979987, - "learning_rate": 7.727272727272727e-06, - "loss": 0.8656, + "grad_norm": 2.0750349216859765, + "learning_rate": 5.2245862884160754e-06, + "loss": 0.5544, "step": 221 }, { "epoch": 0.02, - "grad_norm": 3.137600063788741, - "learning_rate": 7.762237762237763e-06, - "loss": 0.7638, + "grad_norm": 2.048908153034241, + "learning_rate": 5.24822695035461e-06, + "loss": 0.6785, "step": 222 }, { "epoch": 0.02, - "grad_norm": 4.911586562313934, - "learning_rate": 7.797202797202798e-06, - "loss": 0.7693, + "grad_norm": 1.926955254633487, + "learning_rate": 5.271867612293145e-06, + "loss": 0.6023, "step": 223 }, { "epoch": 0.02, - "grad_norm": 2.9977640714571807, - "learning_rate": 7.832167832167833e-06, - "loss": 0.8815, + "grad_norm": 0.8322456725055736, + "learning_rate": 5.2955082742316784e-06, + "loss": 0.433, "step": 224 }, { "epoch": 0.02, - "grad_norm": 2.6947791098337848, - "learning_rate": 7.867132867132867e-06, - "loss": 0.8508, + "grad_norm": 0.9526387888660328, + "learning_rate": 5.319148936170213e-06, + "loss": 0.4184, "step": 225 }, { "epoch": 0.02, - "grad_norm": 2.70865229471086, - "learning_rate": 7.902097902097902e-06, - "loss": 0.8039, + "grad_norm": 2.279764837851014, + "learning_rate": 5.342789598108748e-06, + "loss": 0.6319, "step": 226 }, { "epoch": 0.02, - "grad_norm": 3.9607344558352198, - "learning_rate": 7.937062937062937e-06, - "loss": 0.8913, + "grad_norm": 1.9231079260218826, + "learning_rate": 5.3664302600472814e-06, + "loss": 0.7165, "step": 227 }, { "epoch": 0.02, - "grad_norm": 2.7336519000248947, - "learning_rate": 7.972027972027973e-06, - "loss": 0.855, + "grad_norm": 2.5359330229169426, + "learning_rate": 5.390070921985816e-06, + "loss": 0.6826, "step": 228 }, { "epoch": 0.02, - "grad_norm": 3.197027691020523, - "learning_rate": 8.006993006993008e-06, - "loss": 0.8883, + "grad_norm": 2.3626667464121374, + "learning_rate": 5.413711583924351e-06, + "loss": 0.6322, "step": 229 }, { "epoch": 0.02, - "grad_norm": 2.8607529888322873, - "learning_rate": 8.041958041958042e-06, - "loss": 0.8003, + "grad_norm": 2.152996996023829, + "learning_rate": 5.4373522458628844e-06, + "loss": 0.616, "step": 230 }, { "epoch": 0.02, - "grad_norm": 6.811064736453866, - "learning_rate": 8.076923076923077e-06, - "loss": 0.9102, + "grad_norm": 1.7480873509891672, + "learning_rate": 5.460992907801419e-06, + "loss": 0.5359, "step": 231 }, { "epoch": 0.02, - "grad_norm": 1.9400768388006893, - "learning_rate": 8.111888111888112e-06, - "loss": 0.6545, + "grad_norm": 1.92330487822303, + "learning_rate": 5.484633569739954e-06, + "loss": 0.6141, "step": 232 }, { "epoch": 0.02, - "grad_norm": 3.1601907658724624, - "learning_rate": 8.146853146853148e-06, - "loss": 0.8757, + "grad_norm": 2.4040968360728185, + "learning_rate": 5.5082742316784874e-06, + "loss": 0.562, "step": 233 }, { "epoch": 0.02, - "grad_norm": 10.646638408245371, - "learning_rate": 8.181818181818183e-06, - "loss": 0.8677, + "grad_norm": 2.116920489934228, + "learning_rate": 5.531914893617022e-06, + "loss": 0.6639, "step": 234 }, { "epoch": 0.02, - "grad_norm": 5.665181271364294, - "learning_rate": 8.216783216783217e-06, - "loss": 0.7499, + "grad_norm": 2.222744562531556, + "learning_rate": 5.555555555555557e-06, + "loss": 0.6301, "step": 235 }, { "epoch": 0.02, - "grad_norm": 4.519763391278135, - "learning_rate": 8.251748251748254e-06, - "loss": 0.8092, + "grad_norm": 1.7979351889416966, + "learning_rate": 5.5791962174940904e-06, + "loss": 0.5751, "step": 236 }, { "epoch": 0.02, - "grad_norm": 4.799514568767844, - "learning_rate": 8.286713286713287e-06, - "loss": 0.8237, + "grad_norm": 5.173884141866729, + "learning_rate": 5.602836879432625e-06, + "loss": 0.6598, "step": 237 }, { - "epoch": 0.03, - "grad_norm": 4.3167393880149625, - "learning_rate": 8.321678321678323e-06, - "loss": 0.7958, + "epoch": 0.02, + "grad_norm": 2.4420450784075487, + "learning_rate": 5.626477541371159e-06, + "loss": 0.6309, "step": 238 }, { - "epoch": 0.03, - "grad_norm": 4.207750765393011, - "learning_rate": 8.356643356643356e-06, - "loss": 0.9301, + "epoch": 0.02, + "grad_norm": 2.425918388030342, + "learning_rate": 5.6501182033096934e-06, + "loss": 0.6037, "step": 239 }, { - "epoch": 0.03, - "grad_norm": 3.0242485006730004, - "learning_rate": 8.391608391608393e-06, - "loss": 0.8689, + "epoch": 0.02, + "grad_norm": 1.7341447896216793, + "learning_rate": 5.673758865248228e-06, + "loss": 0.6353, "step": 240 }, { - "epoch": 0.03, - "grad_norm": 4.061479269990459, - "learning_rate": 8.426573426573428e-06, - "loss": 0.8528, + "epoch": 0.02, + "grad_norm": 2.3018966088550648, + "learning_rate": 5.697399527186762e-06, + "loss": 0.64, "step": 241 }, { - "epoch": 0.03, - "grad_norm": 3.1543956915127973, - "learning_rate": 8.461538461538462e-06, - "loss": 0.8714, + "epoch": 0.02, + "grad_norm": 1.3374635210524473, + "learning_rate": 5.7210401891252964e-06, + "loss": 0.4235, "step": 242 }, { - "epoch": 0.03, - "grad_norm": 5.237885935376305, - "learning_rate": 8.496503496503497e-06, - "loss": 0.7606, + "epoch": 0.02, + "grad_norm": 2.282575378237286, + "learning_rate": 5.744680851063831e-06, + "loss": 0.5711, "step": 243 }, { - "epoch": 0.03, - "grad_norm": 1.950280179321886, - "learning_rate": 8.531468531468533e-06, - "loss": 0.6335, + "epoch": 0.02, + "grad_norm": 2.241359881772414, + "learning_rate": 5.768321513002365e-06, + "loss": 0.7367, "step": 244 }, { - "epoch": 0.03, - "grad_norm": 1.5896370335044245, - "learning_rate": 8.566433566433568e-06, - "loss": 0.6082, + "epoch": 0.02, + "grad_norm": 0.9589976522347291, + "learning_rate": 5.791962174940899e-06, + "loss": 0.4299, "step": 245 }, { - "epoch": 0.03, - "grad_norm": 11.404835463536644, - "learning_rate": 8.601398601398602e-06, - "loss": 0.8319, + "epoch": 0.02, + "grad_norm": 1.744917319264682, + "learning_rate": 5.815602836879432e-06, + "loss": 0.622, "step": 246 }, { - "epoch": 0.03, - "grad_norm": 3.238800843683949, - "learning_rate": 8.636363636363637e-06, - "loss": 0.7277, + "epoch": 0.02, + "grad_norm": 1.8207386108766157, + "learning_rate": 5.839243498817967e-06, + "loss": 0.5983, "step": 247 }, { - "epoch": 0.03, - "grad_norm": 4.816682891374414, - "learning_rate": 8.671328671328672e-06, - "loss": 0.8987, + "epoch": 0.02, + "grad_norm": 1.8499861172988363, + "learning_rate": 5.862884160756502e-06, + "loss": 0.612, "step": 248 }, { - "epoch": 0.03, - "grad_norm": 2.937171146872307, - "learning_rate": 8.706293706293708e-06, - "loss": 0.8465, + "epoch": 0.02, + "grad_norm": 1.8691772168200202, + "learning_rate": 5.886524822695035e-06, + "loss": 0.5319, "step": 249 }, { - "epoch": 0.03, - "grad_norm": 2.9152180387496465, - "learning_rate": 8.741258741258743e-06, - "loss": 0.8588, + "epoch": 0.02, + "grad_norm": 2.6151822742700492, + "learning_rate": 5.91016548463357e-06, + "loss": 0.647, "step": 250 }, { - "epoch": 0.03, - "grad_norm": 3.2233086259580013, - "learning_rate": 8.776223776223777e-06, - "loss": 0.7794, + "epoch": 0.02, + "grad_norm": 2.4267246005298952, + "learning_rate": 5.933806146572105e-06, + "loss": 0.5778, "step": 251 }, { - "epoch": 0.03, - "grad_norm": 2.2240451951651212, - "learning_rate": 8.811188811188812e-06, - "loss": 0.6543, + "epoch": 0.02, + "grad_norm": 2.4965085672937533, + "learning_rate": 5.957446808510638e-06, + "loss": 0.6263, "step": 252 }, { - "epoch": 0.03, - "grad_norm": 3.9198696085904783, - "learning_rate": 8.846153846153847e-06, - "loss": 0.8946, + "epoch": 0.02, + "grad_norm": 1.9438471287419556, + "learning_rate": 5.981087470449173e-06, + "loss": 0.6042, "step": 253 }, { - "epoch": 0.03, - "grad_norm": 3.0548127678087447, - "learning_rate": 8.881118881118883e-06, - "loss": 0.8461, + "epoch": 0.02, + "grad_norm": 1.0685659390083897, + "learning_rate": 6.004728132387707e-06, + "loss": 0.4295, "step": 254 }, { - "epoch": 0.03, - "grad_norm": 3.0034454684442564, - "learning_rate": 8.916083916083916e-06, - "loss": 0.836, + "epoch": 0.02, + "grad_norm": 3.027350347918465, + "learning_rate": 6.028368794326241e-06, + "loss": 0.6276, "step": 255 }, { - "epoch": 0.03, - "grad_norm": 4.356298512009719, - "learning_rate": 8.951048951048951e-06, - "loss": 0.8044, + "epoch": 0.02, + "grad_norm": 2.229266355135027, + "learning_rate": 6.052009456264776e-06, + "loss": 0.595, "step": 256 }, { - "epoch": 0.03, - "grad_norm": 3.7719381130534395, - "learning_rate": 8.986013986013987e-06, - "loss": 0.8598, + "epoch": 0.02, + "grad_norm": 2.2132558520570584, + "learning_rate": 6.07565011820331e-06, + "loss": 0.5963, "step": 257 }, { - "epoch": 0.03, - "grad_norm": 2.961248303513742, - "learning_rate": 9.020979020979022e-06, - "loss": 0.8625, + "epoch": 0.02, + "grad_norm": 1.953861668104456, + "learning_rate": 6.099290780141844e-06, + "loss": 0.6, "step": 258 }, { - "epoch": 0.03, - "grad_norm": 2.494139526881577, - "learning_rate": 9.055944055944057e-06, - "loss": 0.8373, + "epoch": 0.02, + "grad_norm": 2.1426793603173016, + "learning_rate": 6.122931442080379e-06, + "loss": 0.6653, "step": 259 }, { - "epoch": 0.03, - "grad_norm": 11.62343900060472, - "learning_rate": 9.090909090909091e-06, - "loss": 0.8142, + "epoch": 0.02, + "grad_norm": 2.542720484713686, + "learning_rate": 6.146572104018913e-06, + "loss": 0.5929, "step": 260 }, { - "epoch": 0.03, - "grad_norm": 2.976610214894441, - "learning_rate": 9.125874125874126e-06, - "loss": 0.8383, + "epoch": 0.02, + "grad_norm": 3.3792015730201044, + "learning_rate": 6.170212765957447e-06, + "loss": 0.6423, "step": 261 }, { - "epoch": 0.03, - "grad_norm": 3.152334899926377, - "learning_rate": 9.160839160839162e-06, - "loss": 0.831, + "epoch": 0.02, + "grad_norm": 3.0640976142489302, + "learning_rate": 6.193853427895982e-06, + "loss": 0.5904, "step": 262 }, { - "epoch": 0.03, - "grad_norm": 2.943305109872976, - "learning_rate": 9.195804195804197e-06, - "loss": 0.7919, + "epoch": 0.02, + "grad_norm": 1.9350334199356625, + "learning_rate": 6.217494089834516e-06, + "loss": 0.6447, "step": 263 }, { - "epoch": 0.03, - "grad_norm": 1.6262854108938019, - "learning_rate": 9.230769230769232e-06, - "loss": 0.6016, + "epoch": 0.02, + "grad_norm": 2.3687732731010915, + "learning_rate": 6.24113475177305e-06, + "loss": 0.6821, "step": 264 }, { - "epoch": 0.03, - "grad_norm": 2.9820347523442305, - "learning_rate": 9.265734265734266e-06, - "loss": 0.842, + "epoch": 0.02, + "grad_norm": 0.8943668043857836, + "learning_rate": 6.264775413711585e-06, + "loss": 0.4209, "step": 265 }, { - "epoch": 0.03, - "grad_norm": 2.965150403672179, - "learning_rate": 9.300699300699301e-06, - "loss": 0.8197, + "epoch": 0.02, + "grad_norm": 2.1194396436028202, + "learning_rate": 6.288416075650119e-06, + "loss": 0.5754, "step": 266 }, { - "epoch": 0.03, - "grad_norm": 4.015217382657136, - "learning_rate": 9.335664335664337e-06, - "loss": 0.7963, + "epoch": 0.02, + "grad_norm": 2.0594708970722646, + "learning_rate": 6.312056737588653e-06, + "loss": 0.7548, "step": 267 }, { - "epoch": 0.03, - "grad_norm": 3.5798922752668783, - "learning_rate": 9.370629370629372e-06, - "loss": 0.8153, + "epoch": 0.02, + "grad_norm": 2.3497058443209657, + "learning_rate": 6.335697399527188e-06, + "loss": 0.6459, "step": 268 }, { - "epoch": 0.03, - "grad_norm": 3.150189471491218, - "learning_rate": 9.405594405594406e-06, - "loss": 0.7447, + "epoch": 0.02, + "grad_norm": 0.8400306351135198, + "learning_rate": 6.359338061465722e-06, + "loss": 0.4111, "step": 269 }, { - "epoch": 0.03, - "grad_norm": 2.8869071132238133, - "learning_rate": 9.44055944055944e-06, - "loss": 0.8291, + "epoch": 0.02, + "grad_norm": 2.929830230212635, + "learning_rate": 6.382978723404256e-06, + "loss": 0.6433, "step": 270 }, { - "epoch": 0.03, - "grad_norm": 5.031692940937883, - "learning_rate": 9.475524475524476e-06, - "loss": 0.7182, + "epoch": 0.02, + "grad_norm": 2.9675168606594404, + "learning_rate": 6.40661938534279e-06, + "loss": 0.64, "step": 271 }, { - "epoch": 0.03, - "grad_norm": 3.3293803718839365, - "learning_rate": 9.510489510489511e-06, - "loss": 0.7867, + "epoch": 0.02, + "grad_norm": 1.9387238360710977, + "learning_rate": 6.430260047281325e-06, + "loss": 0.6548, "step": 272 }, { - "epoch": 0.03, - "grad_norm": 3.411178547420612, - "learning_rate": 9.545454545454547e-06, - "loss": 0.849, + "epoch": 0.02, + "grad_norm": 1.866164478305682, + "learning_rate": 6.453900709219859e-06, + "loss": 0.628, "step": 273 }, { - "epoch": 0.03, - "grad_norm": 1.8859064509058623, - "learning_rate": 9.58041958041958e-06, - "loss": 0.5875, + "epoch": 0.02, + "grad_norm": 0.884236064552695, + "learning_rate": 6.477541371158393e-06, + "loss": 0.4411, "step": 274 }, { - "epoch": 0.03, - "grad_norm": 3.9199928735928666, - "learning_rate": 9.615384615384616e-06, - "loss": 0.8625, + "epoch": 0.02, + "grad_norm": 2.55431358080111, + "learning_rate": 6.501182033096928e-06, + "loss": 0.631, "step": 275 }, { - "epoch": 0.03, - "grad_norm": 3.4204022429701837, - "learning_rate": 9.650349650349651e-06, - "loss": 0.787, + "epoch": 0.02, + "grad_norm": 2.319321607416945, + "learning_rate": 6.524822695035462e-06, + "loss": 0.6148, "step": 276 }, { - "epoch": 0.03, - "grad_norm": 3.280881830881753, - "learning_rate": 9.685314685314686e-06, - "loss": 0.8484, + "epoch": 0.02, + "grad_norm": 2.117724538033564, + "learning_rate": 6.548463356973995e-06, + "loss": 0.6242, "step": 277 }, { - "epoch": 0.03, - "grad_norm": 3.9903254168619435, - "learning_rate": 9.72027972027972e-06, - "loss": 0.8444, + "epoch": 0.02, + "grad_norm": 0.87811584005566, + "learning_rate": 6.57210401891253e-06, + "loss": 0.4284, "step": 278 }, { - "epoch": 0.03, - "grad_norm": 3.2907073267044566, - "learning_rate": 9.755244755244755e-06, - "loss": 0.7562, + "epoch": 0.02, + "grad_norm": 1.9310970615008756, + "learning_rate": 6.595744680851064e-06, + "loss": 0.5737, "step": 279 }, { - "epoch": 0.03, - "grad_norm": 3.454956915827184, - "learning_rate": 9.79020979020979e-06, - "loss": 0.7811, + "epoch": 0.02, + "grad_norm": 2.955230745343112, + "learning_rate": 6.619385342789598e-06, + "loss": 0.6007, "step": 280 }, { - "epoch": 0.03, - "grad_norm": 4.564600921480734, - "learning_rate": 9.825174825174826e-06, - "loss": 0.7783, + "epoch": 0.02, + "grad_norm": 6.078262572457781, + "learning_rate": 6.643026004728133e-06, + "loss": 0.6707, "step": 281 }, { - "epoch": 0.03, - "grad_norm": 3.0241945476082597, - "learning_rate": 9.860139860139861e-06, - "loss": 0.8345, + "epoch": 0.02, + "grad_norm": 2.696540506411694, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6274, "step": 282 }, { - "epoch": 0.03, - "grad_norm": 3.548190169203221, - "learning_rate": 9.895104895104895e-06, - "loss": 0.7788, + "epoch": 0.02, + "grad_norm": 2.1264118683709414, + "learning_rate": 6.690307328605201e-06, + "loss": 0.5839, "step": 283 }, { - "epoch": 0.03, - "grad_norm": 4.732400299622023, - "learning_rate": 9.93006993006993e-06, - "loss": 0.7922, + "epoch": 0.02, + "grad_norm": 1.9635434754522751, + "learning_rate": 6.713947990543736e-06, + "loss": 0.6305, "step": 284 }, { - "epoch": 0.03, - "grad_norm": 2.878974528064686, - "learning_rate": 9.965034965034966e-06, - "loss": 0.8064, + "epoch": 0.02, + "grad_norm": 2.8179530548994656, + "learning_rate": 6.73758865248227e-06, + "loss": 0.6109, "step": 285 }, { - "epoch": 0.03, - "grad_norm": 4.264526689698259, - "learning_rate": 1e-05, - "loss": 0.8588, + "epoch": 0.02, + "grad_norm": 2.147283340542664, + "learning_rate": 6.761229314420804e-06, + "loss": 0.6575, "step": 286 }, { - "epoch": 0.03, - "grad_norm": 3.20198908359177, - "learning_rate": 9.999999709557228e-06, - "loss": 0.8367, + "epoch": 0.02, + "grad_norm": 2.046292573560414, + "learning_rate": 6.784869976359338e-06, + "loss": 0.6166, "step": 287 }, { - "epoch": 0.03, - "grad_norm": 20.715771128490395, - "learning_rate": 9.999998838228941e-06, - "loss": 0.7669, + "epoch": 0.02, + "grad_norm": 2.2349979813086063, + "learning_rate": 6.808510638297873e-06, + "loss": 0.5921, "step": 288 }, { - "epoch": 0.03, - "grad_norm": 3.3540036271091456, - "learning_rate": 9.999997386015244e-06, - "loss": 0.863, + "epoch": 0.02, + "grad_norm": 6.011143874573497, + "learning_rate": 6.832151300236407e-06, + "loss": 0.6377, "step": 289 }, { - "epoch": 0.03, - "grad_norm": 3.1848307160272125, - "learning_rate": 9.999995352916303e-06, - "loss": 0.7439, + "epoch": 0.02, + "grad_norm": 1.7149223117749663, + "learning_rate": 6.855791962174941e-06, + "loss": 0.5793, "step": 290 }, { - "epoch": 0.03, - "grad_norm": 3.152572478387918, - "learning_rate": 9.999992738932357e-06, - "loss": 0.822, + "epoch": 0.02, + "grad_norm": 2.6870360084163663, + "learning_rate": 6.879432624113476e-06, + "loss": 0.6359, "step": 291 }, { - "epoch": 0.03, - "grad_norm": 3.5369549798522573, - "learning_rate": 9.999989544063708e-06, - "loss": 0.7687, + "epoch": 0.02, + "grad_norm": 2.509501069401399, + "learning_rate": 6.90307328605201e-06, + "loss": 0.6048, "step": 292 }, { - "epoch": 0.03, - "grad_norm": 1.5501867277358583, - "learning_rate": 9.999985768310726e-06, - "loss": 0.6133, + "epoch": 0.02, + "grad_norm": 2.2675136287944646, + "learning_rate": 6.926713947990544e-06, + "loss": 0.6566, "step": 293 }, { - "epoch": 0.03, - "grad_norm": 3.319103455407901, - "learning_rate": 9.99998141167385e-06, - "loss": 0.7729, + "epoch": 0.02, + "grad_norm": 2.30047382654215, + "learning_rate": 6.950354609929079e-06, + "loss": 0.6292, "step": 294 }, { - "epoch": 0.03, - "grad_norm": 3.687148563168577, - "learning_rate": 9.999976474153589e-06, - "loss": 0.8048, + "epoch": 0.02, + "grad_norm": 2.0424336013253064, + "learning_rate": 6.973995271867613e-06, + "loss": 0.6222, "step": 295 }, { - "epoch": 0.03, - "grad_norm": 2.984437946192857, - "learning_rate": 9.999970955750516e-06, - "loss": 0.7737, + "epoch": 0.02, + "grad_norm": 2.0412213008724183, + "learning_rate": 6.997635933806147e-06, + "loss": 0.5894, "step": 296 }, { - "epoch": 0.03, - "grad_norm": 4.5653321300359915, - "learning_rate": 9.999964856465268e-06, - "loss": 0.7832, + "epoch": 0.02, + "grad_norm": 3.3417543290975713, + "learning_rate": 7.021276595744682e-06, + "loss": 0.5642, "step": 297 }, { - "epoch": 0.03, - "grad_norm": 3.6017277018504, - "learning_rate": 9.999958176298559e-06, - "loss": 0.8802, + "epoch": 0.02, + "grad_norm": 2.403106512160002, + "learning_rate": 7.044917257683216e-06, + "loss": 0.5829, "step": 298 }, { - "epoch": 0.03, - "grad_norm": 3.4928074350110396, - "learning_rate": 9.99995091525116e-06, - "loss": 0.8067, + "epoch": 0.02, + "grad_norm": 2.090566868722372, + "learning_rate": 7.06855791962175e-06, + "loss": 0.6031, "step": 299 }, { - "epoch": 0.03, - "grad_norm": 2.7767296913858037, - "learning_rate": 9.999943073323919e-06, - "loss": 0.7946, + "epoch": 0.02, + "grad_norm": 2.1777723014145836, + "learning_rate": 7.092198581560285e-06, + "loss": 0.5388, "step": 300 }, { - "epoch": 0.03, - "grad_norm": 3.560126265511432, - "learning_rate": 9.999934650517743e-06, - "loss": 0.7549, + "epoch": 0.02, + "grad_norm": 1.748322187303465, + "learning_rate": 7.115839243498818e-06, + "loss": 0.6316, "step": 301 }, { - "epoch": 0.03, - "grad_norm": 3.1779121129553207, - "learning_rate": 9.999925646833614e-06, - "loss": 0.8234, + "epoch": 0.02, + "grad_norm": 2.3467489343001207, + "learning_rate": 7.139479905437353e-06, + "loss": 0.5795, "step": 302 }, { - "epoch": 0.03, - "grad_norm": 2.6269027998737364, - "learning_rate": 9.999916062272576e-06, - "loss": 0.747, + "epoch": 0.02, + "grad_norm": 2.6047512781678734, + "learning_rate": 7.163120567375888e-06, + "loss": 0.6116, "step": 303 }, { - "epoch": 0.03, - "grad_norm": 3.0096315209518854, - "learning_rate": 9.999905896835745e-06, - "loss": 0.8391, + "epoch": 0.02, + "grad_norm": 2.1125782871518317, + "learning_rate": 7.186761229314421e-06, + "loss": 0.644, "step": 304 }, { - "epoch": 0.03, - "grad_norm": 3.6570640261588303, - "learning_rate": 9.999895150524297e-06, - "loss": 0.8514, + "epoch": 0.02, + "grad_norm": 1.993291878592755, + "learning_rate": 7.210401891252956e-06, + "loss": 0.6822, "step": 305 }, { - "epoch": 0.03, - "grad_norm": 2.7168969428743295, - "learning_rate": 9.999883823339487e-06, - "loss": 0.7491, + "epoch": 0.02, + "grad_norm": 2.019692812487218, + "learning_rate": 7.234042553191491e-06, + "loss": 0.6087, "step": 306 }, { - "epoch": 0.03, - "grad_norm": 2.929508346366845, - "learning_rate": 9.999871915282625e-06, - "loss": 0.8162, + "epoch": 0.02, + "grad_norm": 2.336444997291636, + "learning_rate": 7.257683215130024e-06, + "loss": 0.5787, "step": 307 }, { - "epoch": 0.03, - "grad_norm": 3.431569543759362, - "learning_rate": 9.999859426355098e-06, - "loss": 0.8174, + "epoch": 0.02, + "grad_norm": 2.0850134094412023, + "learning_rate": 7.281323877068559e-06, + "loss": 0.6551, "step": 308 }, { - "epoch": 0.03, - "grad_norm": 3.6342519486169595, - "learning_rate": 9.999846356558356e-06, - "loss": 0.8412, + "epoch": 0.02, + "grad_norm": 2.4225300082135353, + "learning_rate": 7.304964539007094e-06, + "loss": 0.6174, "step": 309 }, { - "epoch": 0.03, - "grad_norm": 2.773720032260141, - "learning_rate": 9.999832705893919e-06, - "loss": 0.8367, + "epoch": 0.02, + "grad_norm": 2.583401647353592, + "learning_rate": 7.3286052009456266e-06, + "loss": 0.6061, "step": 310 }, { - "epoch": 0.03, - "grad_norm": 2.9812086033455607, - "learning_rate": 9.999818474363368e-06, - "loss": 0.8288, + "epoch": 0.02, + "grad_norm": 2.6144572594636575, + "learning_rate": 7.352245862884161e-06, + "loss": 0.6351, "step": 311 }, { - "epoch": 0.03, - "grad_norm": 3.9790057425352723, - "learning_rate": 9.999803661968361e-06, - "loss": 0.8828, + "epoch": 0.02, + "grad_norm": 2.34160821928125, + "learning_rate": 7.375886524822695e-06, + "loss": 0.6232, "step": 312 }, { - "epoch": 0.03, - "grad_norm": 3.06724404084709, - "learning_rate": 9.999788268710619e-06, - "loss": 0.8277, + "epoch": 0.02, + "grad_norm": 1.8616464175081722, + "learning_rate": 7.3995271867612296e-06, + "loss": 0.5718, "step": 313 }, { - "epoch": 0.03, - "grad_norm": 3.020362580767787, - "learning_rate": 9.999772294591927e-06, - "loss": 0.7538, + "epoch": 0.02, + "grad_norm": 1.8393024740834645, + "learning_rate": 7.423167848699764e-06, + "loss": 0.634, "step": 314 }, { - "epoch": 0.03, - "grad_norm": 3.4612261352729305, - "learning_rate": 9.999755739614144e-06, - "loss": 0.7895, + "epoch": 0.02, + "grad_norm": 1.984175163360553, + "learning_rate": 7.446808510638298e-06, + "loss": 0.6588, "step": 315 }, { - "epoch": 0.03, - "grad_norm": 3.272614641342335, - "learning_rate": 9.999738603779192e-06, - "loss": 0.6866, + "epoch": 0.02, + "grad_norm": 4.970645907307493, + "learning_rate": 7.4704491725768326e-06, + "loss": 0.5868, "step": 316 }, { - "epoch": 0.03, - "grad_norm": 1.7980881010106364, - "learning_rate": 9.999720887089062e-06, - "loss": 0.6429, + "epoch": 0.02, + "grad_norm": 2.170123670941602, + "learning_rate": 7.494089834515367e-06, + "loss": 0.5987, "step": 317 }, { - "epoch": 0.03, - "grad_norm": 3.3263471950749786, - "learning_rate": 9.99970258954581e-06, - "loss": 0.8188, + "epoch": 0.02, + "grad_norm": 2.291316350579009, + "learning_rate": 7.517730496453901e-06, + "loss": 0.5691, "step": 318 }, { - "epoch": 0.03, - "grad_norm": 3.0088862503780187, - "learning_rate": 9.999683711151565e-06, - "loss": 0.7195, + "epoch": 0.02, + "grad_norm": 1.9967141003084057, + "learning_rate": 7.5413711583924356e-06, + "loss": 0.585, "step": 319 }, { - "epoch": 0.03, - "grad_norm": 2.87971268033177, - "learning_rate": 9.99966425190852e-06, - "loss": 0.8251, + "epoch": 0.02, + "grad_norm": 2.205025391781694, + "learning_rate": 7.565011820330969e-06, + "loss": 0.7242, "step": 320 }, { - "epoch": 0.03, - "grad_norm": 2.8366240584831233, - "learning_rate": 9.999644211818934e-06, - "loss": 0.8274, + "epoch": 0.02, + "grad_norm": 2.143973978339484, + "learning_rate": 7.588652482269504e-06, + "loss": 0.5935, "step": 321 }, { - "epoch": 0.03, - "grad_norm": 3.3752232102724986, - "learning_rate": 9.999623590885135e-06, - "loss": 0.7519, + "epoch": 0.02, + "grad_norm": 0.9475879315260549, + "learning_rate": 7.6122931442080386e-06, + "loss": 0.4418, "step": 322 }, { - "epoch": 0.03, - "grad_norm": 3.248073936944215, - "learning_rate": 9.999602389109521e-06, - "loss": 0.8161, + "epoch": 0.02, + "grad_norm": 0.94530812902598, + "learning_rate": 7.635933806146573e-06, + "loss": 0.446, "step": 323 }, { - "epoch": 0.03, - "grad_norm": 2.7887278560705093, - "learning_rate": 9.999580606494554e-06, - "loss": 0.8526, + "epoch": 0.02, + "grad_norm": 1.8266913056807288, + "learning_rate": 7.659574468085107e-06, + "loss": 0.7063, "step": 324 }, { - "epoch": 0.03, - "grad_norm": 2.6703840967870525, - "learning_rate": 9.999558243042763e-06, - "loss": 0.763, + "epoch": 0.02, + "grad_norm": 2.463706612258893, + "learning_rate": 7.68321513002364e-06, + "loss": 0.5881, "step": 325 }, { - "epoch": 0.03, - "grad_norm": 3.653691408190642, - "learning_rate": 9.999535298756749e-06, - "loss": 0.8728, + "epoch": 0.02, + "grad_norm": 2.604791553022805, + "learning_rate": 7.706855791962176e-06, + "loss": 0.6763, "step": 326 }, { - "epoch": 0.03, - "grad_norm": 2.6557511065861665, - "learning_rate": 9.999511773639177e-06, - "loss": 0.8372, + "epoch": 0.02, + "grad_norm": 1.8924654838469481, + "learning_rate": 7.73049645390071e-06, + "loss": 0.6021, "step": 327 }, { - "epoch": 0.03, - "grad_norm": 2.9851651458699573, - "learning_rate": 9.999487667692778e-06, - "loss": 0.8749, + "epoch": 0.02, + "grad_norm": 1.6238009898913615, + "learning_rate": 7.754137115839244e-06, + "loss": 0.5749, "step": 328 }, { - "epoch": 0.03, - "grad_norm": 3.556775744280704, - "learning_rate": 9.999462980920353e-06, - "loss": 0.9202, + "epoch": 0.02, + "grad_norm": 2.0355786303738426, + "learning_rate": 7.77777777777778e-06, + "loss": 0.5809, "step": 329 }, { - "epoch": 0.03, - "grad_norm": 3.1780645043985722, - "learning_rate": 9.99943771332477e-06, - "loss": 0.7793, + "epoch": 0.02, + "grad_norm": 1.9207474085031437, + "learning_rate": 7.801418439716313e-06, + "loss": 0.5797, "step": 330 }, { - "epoch": 0.03, - "grad_norm": 2.50054261428372, - "learning_rate": 9.999411864908967e-06, - "loss": 0.7445, + "epoch": 0.02, + "grad_norm": 2.1143960443693732, + "learning_rate": 7.825059101654847e-06, + "loss": 0.6487, "step": 331 }, { - "epoch": 0.03, - "grad_norm": 8.416951605036898, - "learning_rate": 9.999385435675947e-06, - "loss": 0.7777, + "epoch": 0.02, + "grad_norm": 2.033327914147217, + "learning_rate": 7.848699763593382e-06, + "loss": 0.6823, "step": 332 }, { - "epoch": 0.04, - "grad_norm": 1.7297205360660906, - "learning_rate": 9.999358425628777e-06, - "loss": 0.6569, + "epoch": 0.02, + "grad_norm": 2.1158852684356924, + "learning_rate": 7.872340425531916e-06, + "loss": 0.6055, "step": 333 }, { - "epoch": 0.04, - "grad_norm": 2.763919036592778, - "learning_rate": 9.999330834770598e-06, - "loss": 0.7999, + "epoch": 0.02, + "grad_norm": 1.880102422329663, + "learning_rate": 7.89598108747045e-06, + "loss": 0.6515, "step": 334 }, { - "epoch": 0.04, - "grad_norm": 3.3468910031579515, - "learning_rate": 9.999302663104611e-06, - "loss": 0.8996, + "epoch": 0.02, + "grad_norm": 1.9025149898387945, + "learning_rate": 7.919621749408985e-06, + "loss": 0.5768, "step": 335 }, { - "epoch": 0.04, - "grad_norm": 2.52656655880427, - "learning_rate": 9.999273910634095e-06, - "loss": 0.8481, + "epoch": 0.02, + "grad_norm": 2.1549472944395816, + "learning_rate": 7.943262411347519e-06, + "loss": 0.6651, "step": 336 }, { - "epoch": 0.04, - "grad_norm": 3.0210600507512853, - "learning_rate": 9.999244577362388e-06, - "loss": 0.7786, + "epoch": 0.02, + "grad_norm": 1.7436315644493636, + "learning_rate": 7.966903073286053e-06, + "loss": 0.6324, "step": 337 }, { - "epoch": 0.04, - "grad_norm": 2.449552196852401, - "learning_rate": 9.999214663292896e-06, - "loss": 0.7566, + "epoch": 0.02, + "grad_norm": 1.7910781595848788, + "learning_rate": 7.990543735224588e-06, + "loss": 0.6185, "step": 338 }, { - "epoch": 0.04, - "grad_norm": 3.5228157643635942, - "learning_rate": 9.999184168429095e-06, - "loss": 0.8273, + "epoch": 0.02, + "grad_norm": 2.671234577697105, + "learning_rate": 8.014184397163122e-06, + "loss": 0.6658, "step": 339 }, { - "epoch": 0.04, - "grad_norm": 2.8127747250358417, - "learning_rate": 9.99915309277453e-06, - "loss": 0.7628, + "epoch": 0.02, + "grad_norm": 3.1615541550364177, + "learning_rate": 8.037825059101656e-06, + "loss": 0.5478, "step": 340 }, { - "epoch": 0.04, - "grad_norm": 2.9561302000674052, - "learning_rate": 9.999121436332809e-06, - "loss": 0.7372, + "epoch": 0.02, + "grad_norm": 1.1265840907786644, + "learning_rate": 8.061465721040191e-06, + "loss": 0.441, "step": 341 }, { - "epoch": 0.04, - "grad_norm": 2.635610731073348, - "learning_rate": 9.99908919910761e-06, - "loss": 0.8471, + "epoch": 0.02, + "grad_norm": 3.6930899183772192, + "learning_rate": 8.085106382978723e-06, + "loss": 0.7241, "step": 342 }, { - "epoch": 0.04, - "grad_norm": 2.974938379443744, - "learning_rate": 9.99905638110268e-06, - "loss": 0.8192, + "epoch": 0.02, + "grad_norm": 2.1981823695190608, + "learning_rate": 8.108747044917257e-06, + "loss": 0.5729, "step": 343 }, { - "epoch": 0.04, - "grad_norm": 3.7322849018695665, - "learning_rate": 9.99902298232183e-06, - "loss": 0.8051, + "epoch": 0.02, + "grad_norm": 2.1312927140441005, + "learning_rate": 8.132387706855792e-06, + "loss": 0.5973, "step": 344 }, { - "epoch": 0.04, - "grad_norm": 2.989115971364437, - "learning_rate": 9.998989002768939e-06, - "loss": 0.7731, + "epoch": 0.02, + "grad_norm": 2.557075663219547, + "learning_rate": 8.156028368794326e-06, + "loss": 0.5948, "step": 345 }, { - "epoch": 0.04, - "grad_norm": 2.7573253684038, - "learning_rate": 9.99895444244796e-06, - "loss": 0.7253, + "epoch": 0.02, + "grad_norm": 0.8753559873805663, + "learning_rate": 8.17966903073286e-06, + "loss": 0.4182, "step": 346 }, { - "epoch": 0.04, - "grad_norm": 3.0060806455923066, - "learning_rate": 9.998919301362902e-06, - "loss": 0.8549, + "epoch": 0.02, + "grad_norm": 2.3843164064101305, + "learning_rate": 8.203309692671395e-06, + "loss": 0.7099, "step": 347 }, { - "epoch": 0.04, - "grad_norm": 2.813781196605346, - "learning_rate": 9.998883579517849e-06, - "loss": 0.8518, + "epoch": 0.02, + "grad_norm": 2.226871693466023, + "learning_rate": 8.22695035460993e-06, + "loss": 0.5838, "step": 348 }, { - "epoch": 0.04, - "grad_norm": 3.0126862479542558, - "learning_rate": 9.998847276916953e-06, - "loss": 0.7217, + "epoch": 0.02, + "grad_norm": 2.1076826836049203, + "learning_rate": 8.250591016548463e-06, + "loss": 0.6022, "step": 349 }, { - "epoch": 0.04, - "grad_norm": 2.5842429131791644, - "learning_rate": 9.99881039356443e-06, - "loss": 0.8056, + "epoch": 0.02, + "grad_norm": 2.380663233509458, + "learning_rate": 8.274231678486998e-06, + "loss": 0.6895, "step": 350 }, { - "epoch": 0.04, - "grad_norm": 3.153516334642533, - "learning_rate": 9.998772929464567e-06, - "loss": 0.827, + "epoch": 0.02, + "grad_norm": 2.1910461963761962, + "learning_rate": 8.297872340425532e-06, + "loss": 0.6299, "step": 351 }, { - "epoch": 0.04, - "grad_norm": 2.955277320770189, - "learning_rate": 9.998734884621714e-06, - "loss": 0.8264, + "epoch": 0.02, + "grad_norm": 2.1492939857674713, + "learning_rate": 8.321513002364066e-06, + "loss": 0.6809, "step": 352 }, { - "epoch": 0.04, - "grad_norm": 2.3636272885829444, - "learning_rate": 9.998696259040292e-06, - "loss": 0.8436, + "epoch": 0.03, + "grad_norm": 2.04669671603823, + "learning_rate": 8.345153664302601e-06, + "loss": 0.5487, "step": 353 }, { - "epoch": 0.04, - "grad_norm": 2.6445985660522586, - "learning_rate": 9.99865705272479e-06, - "loss": 0.7755, + "epoch": 0.03, + "grad_norm": 3.193631763129984, + "learning_rate": 8.368794326241135e-06, + "loss": 0.6054, "step": 354 }, { - "epoch": 0.04, - "grad_norm": 3.237081162467692, - "learning_rate": 9.99861726567976e-06, - "loss": 0.7811, + "epoch": 0.03, + "grad_norm": 2.07281163950364, + "learning_rate": 8.392434988179669e-06, + "loss": 0.5948, "step": 355 }, { - "epoch": 0.04, - "grad_norm": 2.6981850184698, - "learning_rate": 9.998576897909826e-06, - "loss": 0.7442, + "epoch": 0.03, + "grad_norm": 2.4745230634498143, + "learning_rate": 8.416075650118204e-06, + "loss": 0.6859, "step": 356 }, { - "epoch": 0.04, - "grad_norm": 3.1062173431704796, - "learning_rate": 9.998535949419676e-06, - "loss": 0.8203, + "epoch": 0.03, + "grad_norm": 0.8582869660609473, + "learning_rate": 8.439716312056738e-06, + "loss": 0.4304, "step": 357 }, { - "epoch": 0.04, - "grad_norm": 3.337004723227758, - "learning_rate": 9.99849442021407e-06, - "loss": 0.8208, + "epoch": 0.03, + "grad_norm": 2.2129767939103395, + "learning_rate": 8.463356973995272e-06, + "loss": 0.5606, "step": 358 }, { - "epoch": 0.04, - "grad_norm": 7.14701609390736, - "learning_rate": 9.99845231029783e-06, - "loss": 0.8006, + "epoch": 0.03, + "grad_norm": 2.263933634811897, + "learning_rate": 8.486997635933807e-06, + "loss": 0.6006, "step": 359 }, { - "epoch": 0.04, - "grad_norm": 2.19324654121165, - "learning_rate": 9.998409619675852e-06, - "loss": 0.8317, + "epoch": 0.03, + "grad_norm": 1.9323600318164587, + "learning_rate": 8.510638297872341e-06, + "loss": 0.6188, "step": 360 }, { - "epoch": 0.04, - "grad_norm": 3.676166242806993, - "learning_rate": 9.998366348353092e-06, - "loss": 0.7146, + "epoch": 0.03, + "grad_norm": 4.005975598107289, + "learning_rate": 8.534278959810875e-06, + "loss": 0.5893, "step": 361 }, { - "epoch": 0.04, - "grad_norm": 1.7837495615392405, - "learning_rate": 9.998322496334579e-06, - "loss": 0.6307, + "epoch": 0.03, + "grad_norm": 2.127423157847264, + "learning_rate": 8.55791962174941e-06, + "loss": 0.5931, "step": 362 }, { - "epoch": 0.04, - "grad_norm": 2.6924764025176575, - "learning_rate": 9.998278063625407e-06, - "loss": 0.788, + "epoch": 0.03, + "grad_norm": 2.1910497480451565, + "learning_rate": 8.581560283687944e-06, + "loss": 0.6218, "step": 363 }, { - "epoch": 0.04, - "grad_norm": 2.771669821577446, - "learning_rate": 9.998233050230737e-06, - "loss": 0.7719, + "epoch": 0.03, + "grad_norm": 2.1184685435170785, + "learning_rate": 8.605200945626478e-06, + "loss": 0.6796, "step": 364 }, { - "epoch": 0.04, - "grad_norm": 2.7022314056519003, - "learning_rate": 9.9981874561558e-06, - "loss": 0.7276, + "epoch": 0.03, + "grad_norm": 1.8487136254519403, + "learning_rate": 8.628841607565013e-06, + "loss": 0.6597, "step": 365 }, { - "epoch": 0.04, - "grad_norm": 3.4443745382241926, - "learning_rate": 9.998141281405892e-06, - "loss": 0.7159, + "epoch": 0.03, + "grad_norm": 0.9038437362243, + "learning_rate": 8.652482269503547e-06, + "loss": 0.4437, "step": 366 }, { - "epoch": 0.04, - "grad_norm": 2.638255448321009, - "learning_rate": 9.99809452598638e-06, - "loss": 0.8132, + "epoch": 0.03, + "grad_norm": 2.5798214198073794, + "learning_rate": 8.676122931442081e-06, + "loss": 0.5401, "step": 367 }, { - "epoch": 0.04, - "grad_norm": 3.342150185068803, - "learning_rate": 9.998047189902693e-06, - "loss": 0.7607, + "epoch": 0.03, + "grad_norm": 2.3925206905682925, + "learning_rate": 8.699763593380616e-06, + "loss": 0.5959, "step": 368 }, { - "epoch": 0.04, - "grad_norm": 2.415218070936139, - "learning_rate": 9.997999273160333e-06, - "loss": 0.7152, + "epoch": 0.03, + "grad_norm": 1.9968658815156681, + "learning_rate": 8.72340425531915e-06, + "loss": 0.6103, "step": 369 }, { - "epoch": 0.04, - "grad_norm": 4.393978658986462, - "learning_rate": 9.997950775764862e-06, - "loss": 0.8176, + "epoch": 0.03, + "grad_norm": 1.8336797492105232, + "learning_rate": 8.747044917257684e-06, + "loss": 0.6316, "step": 370 }, { - "epoch": 0.04, - "grad_norm": 2.590618917457282, - "learning_rate": 9.99790169772192e-06, - "loss": 0.7372, + "epoch": 0.03, + "grad_norm": 2.529576850073482, + "learning_rate": 8.77068557919622e-06, + "loss": 0.6745, "step": 371 }, { - "epoch": 0.04, - "grad_norm": 3.4930700657768323, - "learning_rate": 9.997852039037206e-06, - "loss": 0.823, + "epoch": 0.03, + "grad_norm": 1.929251438954493, + "learning_rate": 8.794326241134753e-06, + "loss": 0.6582, "step": 372 }, { - "epoch": 0.04, - "grad_norm": 2.487379499776839, - "learning_rate": 9.99780179971649e-06, - "loss": 0.6554, + "epoch": 0.03, + "grad_norm": 1.8891939284759103, + "learning_rate": 8.817966903073287e-06, + "loss": 0.6353, "step": 373 }, { - "epoch": 0.04, - "grad_norm": 2.624919386906931, - "learning_rate": 9.997750979765606e-06, - "loss": 0.8252, + "epoch": 0.03, + "grad_norm": 2.2843901750783764, + "learning_rate": 8.84160756501182e-06, + "loss": 0.5779, "step": 374 }, { - "epoch": 0.04, - "grad_norm": 2.6531319266118483, - "learning_rate": 9.997699579190462e-06, - "loss": 0.9001, + "epoch": 0.03, + "grad_norm": 1.9352115203970868, + "learning_rate": 8.865248226950355e-06, + "loss": 0.6481, "step": 375 }, { - "epoch": 0.04, - "grad_norm": 2.769920692517521, - "learning_rate": 9.997647597997025e-06, - "loss": 0.8726, + "epoch": 0.03, + "grad_norm": 1.7573679707082033, + "learning_rate": 8.888888888888888e-06, + "loss": 0.6559, "step": 376 }, { - "epoch": 0.04, - "grad_norm": 2.8611168209874145, - "learning_rate": 9.997595036191338e-06, - "loss": 0.8799, + "epoch": 0.03, + "grad_norm": 2.6362787419821148, + "learning_rate": 8.912529550827424e-06, + "loss": 0.5897, "step": 377 }, { - "epoch": 0.04, - "grad_norm": 2.704800856083731, - "learning_rate": 9.997541893779507e-06, - "loss": 0.8497, + "epoch": 0.03, + "grad_norm": 2.6083820211608773, + "learning_rate": 8.936170212765958e-06, + "loss": 0.5808, "step": 378 }, { - "epoch": 0.04, - "grad_norm": 2.4415710077044612, - "learning_rate": 9.997488170767706e-06, - "loss": 0.8527, + "epoch": 0.03, + "grad_norm": 2.093429173347105, + "learning_rate": 8.959810874704491e-06, + "loss": 0.6066, "step": 379 }, { - "epoch": 0.04, - "grad_norm": 3.050991550010565, - "learning_rate": 9.997433867162174e-06, - "loss": 0.7829, + "epoch": 0.03, + "grad_norm": 1.8522287764911876, + "learning_rate": 8.983451536643027e-06, + "loss": 0.6136, "step": 380 }, { - "epoch": 0.04, - "grad_norm": 2.869557820847034, - "learning_rate": 9.997378982969223e-06, - "loss": 0.8413, + "epoch": 0.03, + "grad_norm": 1.9388692480217478, + "learning_rate": 9.00709219858156e-06, + "loss": 0.6412, "step": 381 }, { - "epoch": 0.04, - "grad_norm": 2.5652433421128276, - "learning_rate": 9.997323518195227e-06, - "loss": 0.7852, + "epoch": 0.03, + "grad_norm": 1.1316123287352875, + "learning_rate": 9.030732860520094e-06, + "loss": 0.4386, "step": 382 }, { - "epoch": 0.04, - "grad_norm": 3.0907376752843794, - "learning_rate": 9.99726747284663e-06, - "loss": 0.8476, + "epoch": 0.03, + "grad_norm": 0.879938641379592, + "learning_rate": 9.05437352245863e-06, + "loss": 0.4433, "step": 383 }, { - "epoch": 0.04, - "grad_norm": 2.576052254739924, - "learning_rate": 9.997210846929945e-06, - "loss": 0.7156, + "epoch": 0.03, + "grad_norm": 2.69067768010082, + "learning_rate": 9.078014184397164e-06, + "loss": 0.5895, "step": 384 }, { - "epoch": 0.04, - "grad_norm": 2.957239805878372, - "learning_rate": 9.997153640451748e-06, - "loss": 0.7981, + "epoch": 0.03, + "grad_norm": 2.0209256009242886, + "learning_rate": 9.101654846335697e-06, + "loss": 0.5338, "step": 385 }, { - "epoch": 0.04, - "grad_norm": 2.406716857617946, - "learning_rate": 9.997095853418685e-06, - "loss": 0.7176, + "epoch": 0.03, + "grad_norm": 1.0397709911903443, + "learning_rate": 9.125295508274233e-06, + "loss": 0.4457, "step": 386 }, { - "epoch": 0.04, - "grad_norm": 2.5783679058938773, - "learning_rate": 9.997037485837474e-06, - "loss": 0.8001, + "epoch": 0.03, + "grad_norm": 1.8782836923334245, + "learning_rate": 9.148936170212767e-06, + "loss": 0.6204, "step": 387 }, { - "epoch": 0.04, - "grad_norm": 3.2799386939664985, - "learning_rate": 9.996978537714891e-06, - "loss": 0.7861, + "epoch": 0.03, + "grad_norm": 1.8481548702128392, + "learning_rate": 9.1725768321513e-06, + "loss": 0.5851, "step": 388 }, { - "epoch": 0.04, - "grad_norm": 2.6320652922884773, - "learning_rate": 9.996919009057787e-06, - "loss": 0.844, + "epoch": 0.03, + "grad_norm": 2.049637721396946, + "learning_rate": 9.196217494089836e-06, + "loss": 0.6538, "step": 389 }, { - "epoch": 0.04, - "grad_norm": 3.079074473308074, - "learning_rate": 9.996858899873076e-06, - "loss": 0.8376, + "epoch": 0.03, + "grad_norm": 1.8603948156072645, + "learning_rate": 9.21985815602837e-06, + "loss": 0.5481, "step": 390 }, { - "epoch": 0.04, - "grad_norm": 2.810909666983869, - "learning_rate": 9.996798210167745e-06, - "loss": 0.8102, + "epoch": 0.03, + "grad_norm": 2.068030725370351, + "learning_rate": 9.243498817966903e-06, + "loss": 0.6356, "step": 391 }, { - "epoch": 0.04, - "grad_norm": 2.822002229623917, - "learning_rate": 9.996736939948838e-06, - "loss": 0.7358, + "epoch": 0.03, + "grad_norm": 2.6869762348526804, + "learning_rate": 9.267139479905439e-06, + "loss": 0.5658, "step": 392 }, { - "epoch": 0.04, - "grad_norm": 2.553150551330481, - "learning_rate": 9.996675089223481e-06, - "loss": 0.8613, + "epoch": 0.03, + "grad_norm": 3.703195674165023, + "learning_rate": 9.290780141843973e-06, + "loss": 0.6712, "step": 393 }, { - "epoch": 0.04, - "grad_norm": 2.6973816176709278, - "learning_rate": 9.996612657998856e-06, - "loss": 0.7688, + "epoch": 0.03, + "grad_norm": 2.146561298913722, + "learning_rate": 9.314420803782506e-06, + "loss": 0.5824, "step": 394 }, { - "epoch": 0.04, - "grad_norm": 3.017782068668808, - "learning_rate": 9.996549646282214e-06, - "loss": 0.8728, + "epoch": 0.03, + "grad_norm": 1.8588813054324174, + "learning_rate": 9.338061465721042e-06, + "loss": 0.5665, "step": 395 }, { - "epoch": 0.04, - "grad_norm": 2.4233265237128307, - "learning_rate": 9.99648605408088e-06, - "loss": 0.7584, + "epoch": 0.03, + "grad_norm": 2.4400727930026456, + "learning_rate": 9.361702127659576e-06, + "loss": 0.5711, "step": 396 }, { - "epoch": 0.04, - "grad_norm": 2.5786786252897844, - "learning_rate": 9.996421881402238e-06, - "loss": 0.8619, + "epoch": 0.03, + "grad_norm": 2.420514059813741, + "learning_rate": 9.38534278959811e-06, + "loss": 0.7586, "step": 397 }, { - "epoch": 0.04, - "grad_norm": 2.733216464863266, - "learning_rate": 9.996357128253747e-06, - "loss": 0.7994, + "epoch": 0.03, + "grad_norm": 2.106911929269598, + "learning_rate": 9.408983451536645e-06, + "loss": 0.6493, "step": 398 }, { - "epoch": 0.04, - "grad_norm": 2.1790269474955832, - "learning_rate": 9.996291794642924e-06, - "loss": 0.7871, + "epoch": 0.03, + "grad_norm": 2.072530122310079, + "learning_rate": 9.432624113475179e-06, + "loss": 0.5177, "step": 399 }, { - "epoch": 0.04, - "grad_norm": 2.506896691185903, - "learning_rate": 9.996225880577366e-06, - "loss": 0.7843, + "epoch": 0.03, + "grad_norm": 1.9551347240993509, + "learning_rate": 9.456264775413712e-06, + "loss": 0.5946, "step": 400 }, { - "epoch": 0.04, - "grad_norm": 2.3733740763366336, - "learning_rate": 9.996159386064728e-06, - "loss": 0.743, + "epoch": 0.03, + "grad_norm": 2.409768171945308, + "learning_rate": 9.479905437352248e-06, + "loss": 0.5555, "step": 401 }, { - "epoch": 0.04, - "grad_norm": 2.3332932132080813, - "learning_rate": 9.996092311112734e-06, - "loss": 0.7422, + "epoch": 0.03, + "grad_norm": 1.9317505225205758, + "learning_rate": 9.503546099290782e-06, + "loss": 0.5903, "step": 402 }, { - "epoch": 0.04, - "grad_norm": 2.521593140693123, - "learning_rate": 9.996024655729177e-06, - "loss": 0.8187, + "epoch": 0.03, + "grad_norm": 1.9306826865626512, + "learning_rate": 9.527186761229315e-06, + "loss": 0.5887, "step": 403 }, { - "epoch": 0.04, - "grad_norm": 2.7935109625374253, - "learning_rate": 9.99595641992192e-06, - "loss": 0.8251, + "epoch": 0.03, + "grad_norm": 1.724141527162873, + "learning_rate": 9.55082742316785e-06, + "loss": 0.5717, "step": 404 }, { - "epoch": 0.04, - "grad_norm": 8.26279670060322, - "learning_rate": 9.995887603698886e-06, - "loss": 0.7706, + "epoch": 0.03, + "grad_norm": 1.129590676540281, + "learning_rate": 9.574468085106385e-06, + "loss": 0.4453, "step": 405 }, { - "epoch": 0.04, - "grad_norm": 2.41303802576548, - "learning_rate": 9.99581820706807e-06, - "loss": 0.7678, + "epoch": 0.03, + "grad_norm": 2.6993176113650215, + "learning_rate": 9.598108747044918e-06, + "loss": 0.6613, "step": 406 }, { - "epoch": 0.04, - "grad_norm": 3.0303506168562255, - "learning_rate": 9.99574823003754e-06, - "loss": 0.7319, + "epoch": 0.03, + "grad_norm": 2.0887868782976944, + "learning_rate": 9.621749408983452e-06, + "loss": 0.591, "step": 407 }, { - "epoch": 0.04, - "grad_norm": 3.486164975875137, - "learning_rate": 9.99567767261542e-06, - "loss": 0.8546, + "epoch": 0.03, + "grad_norm": 2.106431258762348, + "learning_rate": 9.645390070921986e-06, + "loss": 0.594, "step": 408 }, { - "epoch": 0.04, - "grad_norm": 2.9414654462050036, - "learning_rate": 9.995606534809909e-06, - "loss": 0.77, + "epoch": 0.03, + "grad_norm": 2.1784688426064713, + "learning_rate": 9.66903073286052e-06, + "loss": 0.6298, "step": 409 }, { - "epoch": 0.04, - "grad_norm": 2.5654915500442366, - "learning_rate": 9.995534816629271e-06, - "loss": 0.7913, + "epoch": 0.03, + "grad_norm": 2.1721541266727655, + "learning_rate": 9.692671394799055e-06, + "loss": 0.6505, "step": 410 }, { - "epoch": 0.04, - "grad_norm": 2.6295512034240995, - "learning_rate": 9.99546251808184e-06, - "loss": 0.8889, + "epoch": 0.03, + "grad_norm": 2.1809630033542047, + "learning_rate": 9.716312056737589e-06, + "loss": 0.6297, "step": 411 }, { - "epoch": 0.04, - "grad_norm": 2.789997301197883, - "learning_rate": 9.995389639176013e-06, - "loss": 0.848, + "epoch": 0.03, + "grad_norm": 0.8424817420723397, + "learning_rate": 9.739952718676123e-06, + "loss": 0.4238, "step": 412 }, { - "epoch": 0.04, - "grad_norm": 2.9172960774366037, - "learning_rate": 9.995316179920258e-06, - "loss": 0.729, + "epoch": 0.03, + "grad_norm": 1.784788182359966, + "learning_rate": 9.763593380614658e-06, + "loss": 0.5542, "step": 413 }, { - "epoch": 0.04, - "grad_norm": 2.5099680383958822, - "learning_rate": 9.99524214032311e-06, - "loss": 0.7368, + "epoch": 0.03, + "grad_norm": 2.4377206466890113, + "learning_rate": 9.787234042553192e-06, + "loss": 0.663, "step": 414 }, { - "epoch": 0.04, - "grad_norm": 2.516008292563799, - "learning_rate": 9.99516752039317e-06, - "loss": 0.8693, + "epoch": 0.03, + "grad_norm": 2.050555734404037, + "learning_rate": 9.810874704491726e-06, + "loss": 0.6063, "step": 415 }, { - "epoch": 0.04, - "grad_norm": 2.2755006768965584, - "learning_rate": 9.995092320139106e-06, - "loss": 0.7947, + "epoch": 0.03, + "grad_norm": 1.9668111735131868, + "learning_rate": 9.834515366430261e-06, + "loss": 0.5514, "step": 416 }, { - "epoch": 0.04, - "grad_norm": 2.276675967191438, - "learning_rate": 9.995016539569656e-06, - "loss": 0.8244, + "epoch": 0.03, + "grad_norm": 1.859227133343033, + "learning_rate": 9.858156028368795e-06, + "loss": 0.6234, "step": 417 }, { - "epoch": 0.04, - "grad_norm": 2.4161022069453173, - "learning_rate": 9.994940178693624e-06, - "loss": 0.8016, + "epoch": 0.03, + "grad_norm": 4.700581455890908, + "learning_rate": 9.881796690307329e-06, + "loss": 0.5857, "step": 418 }, { - "epoch": 0.04, - "grad_norm": 3.8339702744181325, - "learning_rate": 9.99486323751988e-06, - "loss": 0.7548, + "epoch": 0.03, + "grad_norm": 1.9895807476281837, + "learning_rate": 9.905437352245864e-06, + "loss": 0.6442, "step": 419 }, { - "epoch": 0.04, - "grad_norm": 4.975612430718834, - "learning_rate": 9.994785716057364e-06, - "loss": 0.7163, + "epoch": 0.03, + "grad_norm": 2.2895671109023112, + "learning_rate": 9.929078014184398e-06, + "loss": 0.6353, "step": 420 }, { - "epoch": 0.04, - "grad_norm": 2.5503239423291677, - "learning_rate": 9.994707614315084e-06, - "loss": 0.7693, + "epoch": 0.03, + "grad_norm": 1.6777334805932096, + "learning_rate": 9.952718676122932e-06, + "loss": 0.523, "step": 421 }, { - "epoch": 0.04, - "grad_norm": 2.8040417675794083, - "learning_rate": 9.99462893230211e-06, - "loss": 0.7943, + "epoch": 0.03, + "grad_norm": 2.054313557069353, + "learning_rate": 9.976359338061467e-06, + "loss": 0.6265, "step": 422 }, { - "epoch": 0.04, - "grad_norm": 3.6155920470952188, - "learning_rate": 9.994549670027584e-06, - "loss": 0.8104, + "epoch": 0.03, + "grad_norm": 1.0850554042760896, + "learning_rate": 1e-05, + "loss": 0.446, "step": 423 }, { - "epoch": 0.04, - "grad_norm": 3.6688952806199437, - "learning_rate": 9.994469827500716e-06, - "loss": 0.7914, + "epoch": 0.03, + "grad_norm": 2.1875223299760185, + "learning_rate": 9.99999986794153e-06, + "loss": 0.6062, "step": 424 }, { - "epoch": 0.04, - "grad_norm": 2.784372060150453, - "learning_rate": 9.99438940473078e-06, - "loss": 0.7975, + "epoch": 0.03, + "grad_norm": 2.0855143446816133, + "learning_rate": 9.999999471766123e-06, + "loss": 0.5843, "step": 425 }, { - "epoch": 0.04, - "grad_norm": 2.5929689621810783, - "learning_rate": 9.994308401727122e-06, - "loss": 0.8029, + "epoch": 0.03, + "grad_norm": 22.755803049184415, + "learning_rate": 9.999998811473801e-06, + "loss": 0.6147, "step": 426 }, { - "epoch": 0.04, - "grad_norm": 2.827200496684302, - "learning_rate": 9.99422681849915e-06, - "loss": 0.8063, + "epoch": 0.03, + "grad_norm": 1.7034978733491404, + "learning_rate": 9.999997887064601e-06, + "loss": 0.5475, "step": 427 }, { - "epoch": 0.05, - "grad_norm": 2.420731477742907, - "learning_rate": 9.994144655056343e-06, - "loss": 0.7089, + "epoch": 0.03, + "grad_norm": 2.2335003786275074, + "learning_rate": 9.99999669853857e-06, + "loss": 0.5942, "step": 428 }, { - "epoch": 0.05, - "grad_norm": 3.1411562499918078, - "learning_rate": 9.994061911408245e-06, - "loss": 0.8219, + "epoch": 0.03, + "grad_norm": 2.5050152320176826, + "learning_rate": 9.999995245895772e-06, + "loss": 0.6755, "step": 429 }, { - "epoch": 0.05, - "grad_norm": 5.296664953262252, - "learning_rate": 9.993978587564473e-06, - "loss": 0.748, + "epoch": 0.03, + "grad_norm": 2.123235795706473, + "learning_rate": 9.999993529136281e-06, + "loss": 0.686, "step": 430 }, { - "epoch": 0.05, - "grad_norm": 3.786778377368977, - "learning_rate": 9.993894683534704e-06, - "loss": 0.79, + "epoch": 0.03, + "grad_norm": 1.0549905978769132, + "learning_rate": 9.999991548260191e-06, + "loss": 0.4642, "step": 431 }, { - "epoch": 0.05, - "grad_norm": 4.9069867693590075, - "learning_rate": 9.993810199328687e-06, - "loss": 0.7826, + "epoch": 0.03, + "grad_norm": 2.028104503190489, + "learning_rate": 9.999989303267605e-06, + "loss": 0.5842, "step": 432 }, { - "epoch": 0.05, - "grad_norm": 3.7740885681631293, - "learning_rate": 9.993725134956235e-06, - "loss": 0.8472, + "epoch": 0.03, + "grad_norm": 1.9980137198882097, + "learning_rate": 9.999986794158641e-06, + "loss": 0.5883, "step": 433 }, { - "epoch": 0.05, - "grad_norm": 3.0792423971075777, - "learning_rate": 9.993639490427235e-06, - "loss": 0.7272, + "epoch": 0.03, + "grad_norm": 4.95261947084521, + "learning_rate": 9.99998402093343e-06, + "loss": 0.6478, "step": 434 }, { - "epoch": 0.05, - "grad_norm": 2.505248944163071, - "learning_rate": 9.993553265751632e-06, - "loss": 0.8125, + "epoch": 0.03, + "grad_norm": 0.873147957115486, + "learning_rate": 9.999980983592125e-06, + "loss": 0.4274, "step": 435 }, { - "epoch": 0.05, - "grad_norm": 3.1705332082489175, - "learning_rate": 9.993466460939447e-06, - "loss": 0.7515, + "epoch": 0.03, + "grad_norm": 1.962941880908112, + "learning_rate": 9.99997768213488e-06, + "loss": 0.6601, "step": 436 }, { - "epoch": 0.05, - "grad_norm": 3.068737573080172, - "learning_rate": 9.993379076000762e-06, - "loss": 0.6471, + "epoch": 0.03, + "grad_norm": 2.117172694912394, + "learning_rate": 9.999974116561872e-06, + "loss": 0.6759, "step": 437 }, { - "epoch": 0.05, - "grad_norm": 3.0565141356797696, - "learning_rate": 9.99329111094573e-06, - "loss": 0.7869, + "epoch": 0.03, + "grad_norm": 2.070729751050917, + "learning_rate": 9.999970286873288e-06, + "loss": 0.6857, "step": 438 }, { - "epoch": 0.05, - "grad_norm": 2.418500158534013, - "learning_rate": 9.993202565784573e-06, - "loss": 0.8211, + "epoch": 0.03, + "grad_norm": 1.792328691351642, + "learning_rate": 9.999966193069332e-06, + "loss": 0.6117, "step": 439 }, { - "epoch": 0.05, - "grad_norm": 2.423639948102629, - "learning_rate": 9.993113440527573e-06, - "loss": 0.815, + "epoch": 0.03, + "grad_norm": 1.6286698831847481, + "learning_rate": 9.999961835150221e-06, + "loss": 0.5911, "step": 440 }, { - "epoch": 0.05, - "grad_norm": 2.2704412010849007, - "learning_rate": 9.993023735185088e-06, - "loss": 0.7843, + "epoch": 0.03, + "grad_norm": 2.290532725641558, + "learning_rate": 9.999957213116183e-06, + "loss": 0.587, "step": 441 }, { - "epoch": 0.05, - "grad_norm": 2.521200294230519, - "learning_rate": 9.992933449767538e-06, - "loss": 0.7483, + "epoch": 0.03, + "grad_norm": 2.0712507395701807, + "learning_rate": 9.999952326967462e-06, + "loss": 0.6172, "step": 442 }, { - "epoch": 0.05, - "grad_norm": 2.706970229721735, - "learning_rate": 9.992842584285416e-06, - "loss": 0.7187, + "epoch": 0.03, + "grad_norm": 1.814219437690518, + "learning_rate": 9.999947176704316e-06, + "loss": 0.6208, "step": 443 }, { - "epoch": 0.05, - "grad_norm": 2.390640870602009, - "learning_rate": 9.992751138749273e-06, - "loss": 0.7721, + "epoch": 0.03, + "grad_norm": 1.8663775511642962, + "learning_rate": 9.99994176232702e-06, + "loss": 0.6249, "step": 444 }, { - "epoch": 0.05, - "grad_norm": 2.5982385192863124, - "learning_rate": 9.992659113169736e-06, - "loss": 0.7153, + "epoch": 0.03, + "grad_norm": 2.7514297033447748, + "learning_rate": 9.999936083835856e-06, + "loss": 0.6375, "step": 445 }, { - "epoch": 0.05, - "grad_norm": 2.4856303794193093, - "learning_rate": 9.992566507557495e-06, - "loss": 0.7573, + "epoch": 0.03, + "grad_norm": 2.146583255742809, + "learning_rate": 9.999930141231127e-06, + "loss": 0.6106, "step": 446 }, { - "epoch": 0.05, - "grad_norm": 2.5444768288105166, - "learning_rate": 9.99247332192331e-06, - "loss": 0.7668, + "epoch": 0.03, + "grad_norm": 3.2936140009088253, + "learning_rate": 9.999923934513146e-06, + "loss": 0.5486, "step": 447 }, { - "epoch": 0.05, - "grad_norm": 2.040260161584014, - "learning_rate": 9.992379556278006e-06, - "loss": 0.7644, + "epoch": 0.03, + "grad_norm": 2.3405766697992267, + "learning_rate": 9.999917463682241e-06, + "loss": 0.6964, "step": 448 }, { - "epoch": 0.05, - "grad_norm": 2.3952337813826174, - "learning_rate": 9.992285210632476e-06, - "loss": 0.7813, + "epoch": 0.03, + "grad_norm": 2.4663997834665987, + "learning_rate": 9.999910728738753e-06, + "loss": 0.6502, "step": 449 }, { - "epoch": 0.05, - "grad_norm": 2.7875393710927003, - "learning_rate": 9.992190284997683e-06, - "loss": 0.8625, + "epoch": 0.03, + "grad_norm": 2.307899006947246, + "learning_rate": 9.999903729683038e-06, + "loss": 0.6407, "step": 450 }, { - "epoch": 0.05, - "grad_norm": 2.3456211144367436, - "learning_rate": 9.992094779384651e-06, - "loss": 0.7421, + "epoch": 0.03, + "grad_norm": 2.2261264949341855, + "learning_rate": 9.999896466515466e-06, + "loss": 0.6041, "step": 451 }, { - "epoch": 0.05, - "grad_norm": 2.2941981241460914, - "learning_rate": 9.991998693804482e-06, - "loss": 0.786, + "epoch": 0.03, + "grad_norm": 3.4670024909888713, + "learning_rate": 9.999888939236422e-06, + "loss": 0.5708, "step": 452 }, { - "epoch": 0.05, - "grad_norm": 2.7710253391308113, - "learning_rate": 9.991902028268333e-06, - "loss": 0.8221, + "epoch": 0.03, + "grad_norm": 1.9916134940828316, + "learning_rate": 9.999881147846301e-06, + "loss": 0.6283, "step": 453 }, { - "epoch": 0.05, - "grad_norm": 2.8127605742355093, - "learning_rate": 9.991804782787435e-06, - "loss": 0.8029, + "epoch": 0.03, + "grad_norm": 1.8559951887730242, + "learning_rate": 9.999873092345516e-06, + "loss": 0.5826, "step": 454 }, { - "epoch": 0.05, - "grad_norm": 2.21579763303864, - "learning_rate": 9.991706957373088e-06, - "loss": 0.8484, + "epoch": 0.03, + "grad_norm": 1.9715093683395095, + "learning_rate": 9.999864772734494e-06, + "loss": 0.5921, "step": 455 }, { - "epoch": 0.05, - "grad_norm": 3.0567607175438702, - "learning_rate": 9.991608552036659e-06, - "loss": 0.7895, + "epoch": 0.03, + "grad_norm": 2.5524785687961073, + "learning_rate": 9.999856189013671e-06, + "loss": 0.6845, "step": 456 }, { - "epoch": 0.05, - "grad_norm": 2.337491818683799, - "learning_rate": 9.991509566789575e-06, - "loss": 0.8293, + "epoch": 0.03, + "grad_norm": 1.5153120227856305, + "learning_rate": 9.999847341183501e-06, + "loss": 0.4623, "step": 457 }, { - "epoch": 0.05, - "grad_norm": 2.0862821534316573, - "learning_rate": 9.991410001643338e-06, - "loss": 0.7856, + "epoch": 0.03, + "grad_norm": 1.2564559672472113, + "learning_rate": 9.999838229244455e-06, + "loss": 0.4419, "step": 458 }, { - "epoch": 0.05, - "grad_norm": 2.278358994242391, - "learning_rate": 9.991309856609517e-06, - "loss": 0.7536, + "epoch": 0.03, + "grad_norm": 1.8994015342533994, + "learning_rate": 9.999828853197013e-06, + "loss": 0.6337, "step": 459 }, { - "epoch": 0.05, - "grad_norm": 1.9648242121722526, - "learning_rate": 9.991209131699745e-06, - "loss": 0.7508, + "epoch": 0.03, + "grad_norm": 1.8336871676104447, + "learning_rate": 9.999819213041665e-06, + "loss": 0.5976, "step": 460 }, { - "epoch": 0.05, - "grad_norm": 2.454545521761428, - "learning_rate": 9.991107826925724e-06, - "loss": 0.7682, + "epoch": 0.03, + "grad_norm": 2.0508705096936644, + "learning_rate": 9.999809308778929e-06, + "loss": 0.6666, "step": 461 }, { - "epoch": 0.05, - "grad_norm": 2.3135550329147647, - "learning_rate": 9.991005942299224e-06, - "loss": 0.794, + "epoch": 0.03, + "grad_norm": 2.332915698788453, + "learning_rate": 9.99979914040932e-06, + "loss": 0.6594, "step": 462 }, { - "epoch": 0.05, - "grad_norm": 2.958497853786959, - "learning_rate": 9.990903477832081e-06, - "loss": 0.884, + "epoch": 0.03, + "grad_norm": 2.285609998559129, + "learning_rate": 9.999788707933382e-06, + "loss": 0.6287, "step": 463 }, { - "epoch": 0.05, - "grad_norm": 2.559497699524832, - "learning_rate": 9.990800433536198e-06, - "loss": 0.7475, + "epoch": 0.03, + "grad_norm": 2.2941943555694406, + "learning_rate": 9.999778011351661e-06, + "loss": 0.5948, "step": 464 }, { - "epoch": 0.05, - "grad_norm": 2.2979423649720405, - "learning_rate": 9.990696809423551e-06, - "loss": 0.8139, + "epoch": 0.03, + "grad_norm": 1.5501557637749934, + "learning_rate": 9.999767050664725e-06, + "loss": 0.5672, "step": 465 }, { - "epoch": 0.05, - "grad_norm": 3.477081620098315, - "learning_rate": 9.990592605506172e-06, - "loss": 0.7413, + "epoch": 0.03, + "grad_norm": 2.568267448780335, + "learning_rate": 9.99975582587315e-06, + "loss": 0.621, "step": 466 }, { - "epoch": 0.05, - "grad_norm": 4.145253092459398, - "learning_rate": 9.990487821796171e-06, - "loss": 0.816, + "epoch": 0.03, + "grad_norm": 2.272348231857212, + "learning_rate": 9.999744336977532e-06, + "loss": 0.662, "step": 467 }, { - "epoch": 0.05, - "grad_norm": 2.6671842619211605, - "learning_rate": 9.99038245830572e-06, - "loss": 0.7634, + "epoch": 0.03, + "grad_norm": 2.1245652183532777, + "learning_rate": 9.999732583978476e-06, + "loss": 0.6502, "step": 468 }, { - "epoch": 0.05, - "grad_norm": 2.4700914843679307, - "learning_rate": 9.990276515047063e-06, - "loss": 0.7116, + "epoch": 0.03, + "grad_norm": 2.092213679057817, + "learning_rate": 9.999720566876605e-06, + "loss": 0.6341, "step": 469 }, { - "epoch": 0.05, - "grad_norm": 2.2107804543409255, - "learning_rate": 9.990169992032506e-06, - "loss": 0.8021, + "epoch": 0.03, + "grad_norm": 1.8142001875679095, + "learning_rate": 9.999708285672551e-06, + "loss": 0.511, "step": 470 }, { - "epoch": 0.05, - "grad_norm": 2.1867548251004902, - "learning_rate": 9.990062889274423e-06, - "loss": 0.8221, + "epoch": 0.03, + "grad_norm": 2.4631822265911083, + "learning_rate": 9.999695740366966e-06, + "loss": 0.6368, "step": 471 }, { - "epoch": 0.05, - "grad_norm": 3.1546753580491793, - "learning_rate": 9.989955206785258e-06, - "loss": 0.8515, + "epoch": 0.03, + "grad_norm": 2.2359457577411708, + "learning_rate": 9.999682930960508e-06, + "loss": 0.6226, "step": 472 }, { - "epoch": 0.05, - "grad_norm": 2.845940673194977, - "learning_rate": 9.989846944577524e-06, - "loss": 0.7823, + "epoch": 0.03, + "grad_norm": 2.3042026477374247, + "learning_rate": 9.999669857453857e-06, + "loss": 0.6059, "step": 473 }, { - "epoch": 0.05, - "grad_norm": 2.782847174447964, - "learning_rate": 9.989738102663792e-06, - "loss": 0.8043, + "epoch": 0.03, + "grad_norm": 1.9471741908991034, + "learning_rate": 9.999656519847704e-06, + "loss": 0.6653, "step": 474 }, { - "epoch": 0.05, - "grad_norm": 1.5304700280693413, - "learning_rate": 9.989628681056716e-06, - "loss": 0.6264, + "epoch": 0.03, + "grad_norm": 1.9441546705902637, + "learning_rate": 9.99964291814275e-06, + "loss": 0.7125, "step": 475 }, { - "epoch": 0.05, - "grad_norm": 2.4549669242826684, - "learning_rate": 9.989518679769e-06, - "loss": 0.8646, + "epoch": 0.03, + "grad_norm": 2.0954695452559515, + "learning_rate": 9.999629052339719e-06, + "loss": 0.5726, "step": 476 }, { - "epoch": 0.05, - "grad_norm": 2.704340279572477, - "learning_rate": 9.989408098813429e-06, - "loss": 0.8055, + "epoch": 0.03, + "grad_norm": 1.766923570965179, + "learning_rate": 9.99961492243934e-06, + "loss": 0.5918, "step": 477 }, { - "epoch": 0.05, - "grad_norm": 2.5055698906785566, - "learning_rate": 9.989296938202846e-06, - "loss": 0.7233, + "epoch": 0.03, + "grad_norm": 1.925135348327929, + "learning_rate": 9.99960052844236e-06, + "loss": 0.5814, "step": 478 }, { - "epoch": 0.05, - "grad_norm": 3.2218415084935477, - "learning_rate": 9.989185197950168e-06, - "loss": 0.8227, + "epoch": 0.03, + "grad_norm": 2.0965818632458926, + "learning_rate": 9.999585870349537e-06, + "loss": 0.6658, "step": 479 }, { - "epoch": 0.05, - "grad_norm": 2.3617802926725933, - "learning_rate": 9.989072878068376e-06, - "loss": 0.8324, + "epoch": 0.03, + "grad_norm": 2.196105671896359, + "learning_rate": 9.999570948161649e-06, + "loss": 0.6154, "step": 480 }, { - "epoch": 0.05, - "grad_norm": 2.4468315450678713, - "learning_rate": 9.98895997857052e-06, - "loss": 0.7699, + "epoch": 0.03, + "grad_norm": 1.913309831435858, + "learning_rate": 9.999555761879482e-06, + "loss": 0.5941, "step": 481 }, { - "epoch": 0.05, - "grad_norm": 2.667742814508917, - "learning_rate": 9.988846499469714e-06, - "loss": 0.9051, + "epoch": 0.03, + "grad_norm": 2.460971651752271, + "learning_rate": 9.99954031150384e-06, + "loss": 0.6147, "step": 482 }, { - "epoch": 0.05, - "grad_norm": 2.7213442322657078, - "learning_rate": 9.988732440779145e-06, - "loss": 0.7052, + "epoch": 0.03, + "grad_norm": 3.0391409946655625, + "learning_rate": 9.999524597035535e-06, + "loss": 0.6049, "step": 483 }, { - "epoch": 0.05, - "grad_norm": 2.326845379917123, - "learning_rate": 9.98861780251206e-06, - "loss": 0.7763, + "epoch": 0.03, + "grad_norm": 2.331122372519916, + "learning_rate": 9.999508618475403e-06, + "loss": 0.6802, "step": 484 }, { - "epoch": 0.05, - "grad_norm": 2.9136376815635368, - "learning_rate": 9.98850258468178e-06, - "loss": 0.7103, + "epoch": 0.03, + "grad_norm": 1.8306106286035506, + "learning_rate": 9.999492375824285e-06, + "loss": 0.5982, "step": 485 }, { - "epoch": 0.05, - "grad_norm": 2.2455870076795366, - "learning_rate": 9.988386787301689e-06, - "loss": 0.8498, + "epoch": 0.03, + "grad_norm": 2.3546967095804905, + "learning_rate": 9.999475869083037e-06, + "loss": 0.6507, "step": 486 }, { - "epoch": 0.05, - "grad_norm": 2.697658986196603, - "learning_rate": 9.988270410385242e-06, - "loss": 0.7247, + "epoch": 0.03, + "grad_norm": 1.9401689794455603, + "learning_rate": 9.999459098252535e-06, + "loss": 0.6034, "step": 487 }, { - "epoch": 0.05, - "grad_norm": 2.6037250054010124, - "learning_rate": 9.98815345394596e-06, - "loss": 0.7998, + "epoch": 0.03, + "grad_norm": 1.8680707621496733, + "learning_rate": 9.999442063333663e-06, + "loss": 0.6466, "step": 488 }, { - "epoch": 0.05, - "grad_norm": 2.2732857324114515, - "learning_rate": 9.988035917997426e-06, - "loss": 0.7466, + "epoch": 0.03, + "grad_norm": 1.6710636156417706, + "learning_rate": 9.999424764327319e-06, + "loss": 0.5802, "step": 489 }, { - "epoch": 0.05, - "grad_norm": 2.447532792950365, - "learning_rate": 9.9879178025533e-06, - "loss": 0.7684, + "epoch": 0.03, + "grad_norm": 1.962656405335296, + "learning_rate": 9.999407201234422e-06, + "loss": 0.576, "step": 490 }, { - "epoch": 0.05, - "grad_norm": 2.4996822315622005, - "learning_rate": 9.987799107627301e-06, - "loss": 0.7892, + "epoch": 0.03, + "grad_norm": 1.8608754414026427, + "learning_rate": 9.999389374055892e-06, + "loss": 0.6263, "step": 491 }, { - "epoch": 0.05, - "grad_norm": 2.4392244915064354, - "learning_rate": 9.987679833233219e-06, - "loss": 0.7154, + "epoch": 0.03, + "grad_norm": 1.7038428042080167, + "learning_rate": 9.999371282792678e-06, + "loss": 0.6258, "step": 492 }, { - "epoch": 0.05, - "grad_norm": 3.01944378814734, - "learning_rate": 9.987559979384913e-06, - "loss": 0.8231, + "epoch": 0.03, + "grad_norm": 2.7299346634445216, + "learning_rate": 9.999352927445732e-06, + "loss": 0.6221, "step": 493 }, { - "epoch": 0.05, - "grad_norm": 2.3479953891689944, - "learning_rate": 9.987439546096309e-06, - "loss": 0.7696, + "epoch": 0.04, + "grad_norm": 2.2005390078664777, + "learning_rate": 9.999334308016024e-06, + "loss": 0.638, "step": 494 }, { - "epoch": 0.05, - "grad_norm": 2.095707445377812, - "learning_rate": 9.987318533381391e-06, - "loss": 0.7402, + "epoch": 0.04, + "grad_norm": 1.6855551445316017, + "learning_rate": 9.999315424504536e-06, + "loss": 0.555, "step": 495 }, { - "epoch": 0.05, - "grad_norm": 3.6125099373264056, - "learning_rate": 9.987196941254226e-06, - "loss": 0.8434, + "epoch": 0.04, + "grad_norm": 2.3654195102561513, + "learning_rate": 9.99929627691227e-06, + "loss": 0.6586, "step": 496 }, { - "epoch": 0.05, - "grad_norm": 2.407463112602799, - "learning_rate": 9.987074769728936e-06, - "loss": 0.7014, + "epoch": 0.04, + "grad_norm": 1.889557322858207, + "learning_rate": 9.999276865240234e-06, + "loss": 0.6243, "step": 497 }, { - "epoch": 0.05, - "grad_norm": 2.2803041227883676, - "learning_rate": 9.986952018819715e-06, - "loss": 0.7015, + "epoch": 0.04, + "grad_norm": 2.039578181457567, + "learning_rate": 9.999257189489454e-06, + "loss": 0.6757, "step": 498 }, { - "epoch": 0.05, - "grad_norm": 2.850670147969541, - "learning_rate": 9.986828688540825e-06, - "loss": 0.8059, + "epoch": 0.04, + "grad_norm": 1.6921587885375207, + "learning_rate": 9.99923724966097e-06, + "loss": 0.6146, "step": 499 }, { - "epoch": 0.05, - "grad_norm": 2.5894833396875727, - "learning_rate": 9.986704778906594e-06, - "loss": 0.8364, + "epoch": 0.04, + "grad_norm": 1.9713363629517324, + "learning_rate": 9.999217045755833e-06, + "loss": 0.6533, "step": 500 }, { - "epoch": 0.05, - "grad_norm": 2.310359350175941, - "learning_rate": 9.986580289931416e-06, - "loss": 0.854, + "epoch": 0.04, + "grad_norm": 1.920753634184261, + "learning_rate": 9.999196577775114e-06, + "loss": 0.5849, "step": 501 }, { - "epoch": 0.05, - "grad_norm": 1.3368374036518098, - "learning_rate": 9.986455221629754e-06, - "loss": 0.5978, + "epoch": 0.04, + "grad_norm": 1.7173101022104877, + "learning_rate": 9.999175845719891e-06, + "loss": 0.6427, "step": 502 }, { - "epoch": 0.05, - "grad_norm": 2.4931722034378425, - "learning_rate": 9.98632957401614e-06, - "loss": 0.8525, + "epoch": 0.04, + "grad_norm": 0.9243342876661618, + "learning_rate": 9.999154849591261e-06, + "loss": 0.4675, "step": 503 }, { - "epoch": 0.05, - "grad_norm": 4.644787385474121, - "learning_rate": 9.986203347105168e-06, - "loss": 0.8037, + "epoch": 0.04, + "grad_norm": 1.8912624197014798, + "learning_rate": 9.999133589390332e-06, + "loss": 0.6546, "step": 504 }, { - "epoch": 0.05, - "grad_norm": 2.563442064696163, - "learning_rate": 9.986076540911507e-06, - "loss": 0.8657, + "epoch": 0.04, + "grad_norm": 2.267540234103927, + "learning_rate": 9.999112065118228e-06, + "loss": 0.6377, "step": 505 }, { - "epoch": 0.05, - "grad_norm": 2.1165485781851836, - "learning_rate": 9.985949155449885e-06, - "loss": 0.7919, + "epoch": 0.04, + "grad_norm": 2.290877658942183, + "learning_rate": 9.999090276776084e-06, + "loss": 0.5846, "step": 506 }, { - "epoch": 0.05, - "grad_norm": 3.471404486523451, - "learning_rate": 9.985821190735104e-06, - "loss": 0.7542, + "epoch": 0.04, + "grad_norm": 2.09920988300228, + "learning_rate": 9.999068224365053e-06, + "loss": 0.5603, "step": 507 }, { - "epoch": 0.05, - "grad_norm": 2.3249602966837206, - "learning_rate": 9.98569264678203e-06, - "loss": 0.8402, + "epoch": 0.04, + "grad_norm": 1.7944665830226367, + "learning_rate": 9.9990459078863e-06, + "loss": 0.5484, "step": 508 }, { - "epoch": 0.05, - "grad_norm": 2.48495778605686, - "learning_rate": 9.985563523605597e-06, - "loss": 0.721, + "epoch": 0.04, + "grad_norm": 3.2535082187929927, + "learning_rate": 9.999023327341002e-06, + "loss": 0.6804, "step": 509 }, { - "epoch": 0.05, - "grad_norm": 2.4276988470343577, - "learning_rate": 9.985433821220805e-06, - "loss": 0.8305, + "epoch": 0.04, + "grad_norm": 1.6155536720601074, + "learning_rate": 9.999000482730353e-06, + "loss": 0.5381, "step": 510 }, { - "epoch": 0.05, - "grad_norm": 2.266343385980247, - "learning_rate": 9.985303539642721e-06, - "loss": 0.8618, + "epoch": 0.04, + "grad_norm": 1.8248015303487206, + "learning_rate": 9.998977374055561e-06, + "loss": 0.5809, "step": 511 }, { - "epoch": 0.05, - "grad_norm": 2.6368944571388737, - "learning_rate": 9.985172678886486e-06, - "loss": 0.7497, + "epoch": 0.04, + "grad_norm": 1.9393530333731168, + "learning_rate": 9.998954001317844e-06, + "loss": 0.5644, "step": 512 }, { - "epoch": 0.05, - "grad_norm": 2.3362271184394703, - "learning_rate": 9.985041238967297e-06, - "loss": 0.7608, + "epoch": 0.04, + "grad_norm": 1.8207521752287739, + "learning_rate": 9.998930364518437e-06, + "loss": 0.6344, "step": 513 }, { - "epoch": 0.05, - "grad_norm": 2.323494738235158, - "learning_rate": 9.984909219900429e-06, - "loss": 0.8141, + "epoch": 0.04, + "grad_norm": 1.7068423797397476, + "learning_rate": 9.998906463658591e-06, + "loss": 0.6049, "step": 514 }, { - "epoch": 0.05, - "grad_norm": 2.2944704925260733, - "learning_rate": 9.984776621701218e-06, - "loss": 0.7668, + "epoch": 0.04, + "grad_norm": 1.714576663821938, + "learning_rate": 9.998882298739567e-06, + "loss": 0.6531, "step": 515 }, { - "epoch": 0.05, - "grad_norm": 2.539104026728401, - "learning_rate": 9.984643444385067e-06, - "loss": 0.6898, + "epoch": 0.04, + "grad_norm": 1.5934447260068765, + "learning_rate": 9.99885786976264e-06, + "loss": 0.4833, "step": 516 }, { - "epoch": 0.05, - "grad_norm": 2.2795736059424563, - "learning_rate": 9.984509687967451e-06, - "loss": 0.8352, + "epoch": 0.04, + "grad_norm": 2.303727161297358, + "learning_rate": 9.998833176729103e-06, + "loss": 0.6388, "step": 517 }, { - "epoch": 0.05, - "grad_norm": 2.5063326762752127, - "learning_rate": 9.984375352463908e-06, - "loss": 0.7219, + "epoch": 0.04, + "grad_norm": 0.9904779351141787, + "learning_rate": 9.99880821964026e-06, + "loss": 0.4476, "step": 518 }, { - "epoch": 0.05, - "grad_norm": 2.875219661862659, - "learning_rate": 9.984240437890045e-06, - "loss": 0.7054, + "epoch": 0.04, + "grad_norm": 1.7985322725256871, + "learning_rate": 9.998782998497428e-06, + "loss": 0.6203, "step": 519 }, { - "epoch": 0.05, - "grad_norm": 2.8779919468667536, - "learning_rate": 9.984104944261536e-06, - "loss": 0.8438, + "epoch": 0.04, + "grad_norm": 2.4369476050354653, + "learning_rate": 9.99875751330194e-06, + "loss": 0.6091, "step": 520 }, { - "epoch": 0.05, - "grad_norm": 2.222566644720778, - "learning_rate": 9.983968871594121e-06, - "loss": 0.7997, + "epoch": 0.04, + "grad_norm": 1.9025855553674964, + "learning_rate": 9.998731764055141e-06, + "loss": 0.6083, "step": 521 }, { - "epoch": 0.05, - "grad_norm": 2.959982902693521, - "learning_rate": 9.98383221990361e-06, - "loss": 0.7926, + "epoch": 0.04, + "grad_norm": 1.9441539227336635, + "learning_rate": 9.998705750758391e-06, + "loss": 0.6258, "step": 522 }, { - "epoch": 0.06, - "grad_norm": 3.1528886906910403, - "learning_rate": 9.983694989205882e-06, - "loss": 0.8242, + "epoch": 0.04, + "grad_norm": 1.834135829686564, + "learning_rate": 9.99867947341307e-06, + "loss": 0.5987, "step": 523 }, { - "epoch": 0.06, - "grad_norm": 2.18977821015223, - "learning_rate": 9.983557179516872e-06, - "loss": 0.757, + "epoch": 0.04, + "grad_norm": 2.16897703565224, + "learning_rate": 9.998652932020555e-06, + "loss": 0.5999, "step": 524 }, { - "epoch": 0.06, - "grad_norm": 1.5734084664110641, - "learning_rate": 9.983418790852597e-06, - "loss": 0.6629, + "epoch": 0.04, + "grad_norm": 1.8138945986792385, + "learning_rate": 9.99862612658226e-06, + "loss": 0.609, "step": 525 }, { - "epoch": 0.06, - "grad_norm": 2.324375311018105, - "learning_rate": 9.983279823229132e-06, - "loss": 0.8114, + "epoch": 0.04, + "grad_norm": 1.877092314177485, + "learning_rate": 9.998599057099592e-06, + "loss": 0.5896, "step": 526 }, { - "epoch": 0.06, - "grad_norm": 3.124013190729634, - "learning_rate": 9.983140276662621e-06, - "loss": 0.779, + "epoch": 0.04, + "grad_norm": 0.9385450241747622, + "learning_rate": 9.998571723573987e-06, + "loss": 0.4504, "step": 527 }, { - "epoch": 0.06, - "grad_norm": 2.535001843346559, - "learning_rate": 9.98300015116928e-06, - "loss": 0.7235, + "epoch": 0.04, + "grad_norm": 1.8552989982645332, + "learning_rate": 9.998544126006884e-06, + "loss": 0.6041, "step": 528 }, { - "epoch": 0.06, - "grad_norm": 2.1443674722870116, - "learning_rate": 9.982859446765385e-06, - "loss": 0.8205, + "epoch": 0.04, + "grad_norm": 2.4542337740888542, + "learning_rate": 9.998516264399742e-06, + "loss": 0.6519, "step": 529 }, { - "epoch": 0.06, - "grad_norm": 3.3022865226413534, - "learning_rate": 9.982718163467282e-06, - "loss": 0.7919, + "epoch": 0.04, + "grad_norm": 1.8807341500407655, + "learning_rate": 9.998488138754036e-06, + "loss": 0.5871, "step": 530 }, { - "epoch": 0.06, - "grad_norm": 2.2210575024061376, - "learning_rate": 9.982576301291387e-06, - "loss": 0.8224, + "epoch": 0.04, + "grad_norm": 2.33476139679285, + "learning_rate": 9.998459749071248e-06, + "loss": 0.6807, "step": 531 }, { - "epoch": 0.06, - "grad_norm": 2.266820994801646, - "learning_rate": 9.982433860254181e-06, - "loss": 0.8027, + "epoch": 0.04, + "grad_norm": 0.9618780216788828, + "learning_rate": 9.998431095352878e-06, + "loss": 0.4318, "step": 532 }, { - "epoch": 0.06, - "grad_norm": 2.8366928780887593, - "learning_rate": 9.982290840372212e-06, - "loss": 0.7632, + "epoch": 0.04, + "grad_norm": 1.8645312740961977, + "learning_rate": 9.998402177600443e-06, + "loss": 0.6163, "step": 533 }, { - "epoch": 0.06, - "grad_norm": 2.7429115320708926, - "learning_rate": 9.982147241662097e-06, - "loss": 0.8406, + "epoch": 0.04, + "grad_norm": 1.9635932627143786, + "learning_rate": 9.998372995815466e-06, + "loss": 0.6528, "step": 534 }, { - "epoch": 0.06, - "grad_norm": 2.324430888530594, - "learning_rate": 9.982003064140515e-06, - "loss": 0.7881, + "epoch": 0.04, + "grad_norm": 1.7344585206969054, + "learning_rate": 9.998343549999492e-06, + "loss": 0.5932, "step": 535 }, { - "epoch": 0.06, - "grad_norm": 2.519378774537875, - "learning_rate": 9.98185830782422e-06, - "loss": 0.7492, + "epoch": 0.04, + "grad_norm": 1.9915528616791083, + "learning_rate": 9.998313840154075e-06, + "loss": 0.6351, "step": 536 }, { - "epoch": 0.06, - "grad_norm": 2.2247325308571977, - "learning_rate": 9.981712972730027e-06, - "loss": 0.7321, + "epoch": 0.04, + "grad_norm": 1.9454575437608645, + "learning_rate": 9.998283866280784e-06, + "loss": 0.6073, "step": 537 }, { - "epoch": 0.06, - "grad_norm": 2.367419912672573, - "learning_rate": 9.981567058874822e-06, - "loss": 0.7559, + "epoch": 0.04, + "grad_norm": 2.6847741458596865, + "learning_rate": 9.998253628381202e-06, + "loss": 0.598, "step": 538 }, { - "epoch": 0.06, - "grad_norm": 2.442752211560955, - "learning_rate": 9.981420566275554e-06, - "loss": 0.7823, + "epoch": 0.04, + "grad_norm": 1.908037612932845, + "learning_rate": 9.998223126456928e-06, + "loss": 0.6649, "step": 539 }, { - "epoch": 0.06, - "grad_norm": 2.718090756388608, - "learning_rate": 9.981273494949247e-06, - "loss": 0.7689, + "epoch": 0.04, + "grad_norm": 2.5781733858935634, + "learning_rate": 9.99819236050957e-06, + "loss": 0.6133, "step": 540 }, { - "epoch": 0.06, - "grad_norm": 1.3168535978993352, - "learning_rate": 9.981125844912985e-06, - "loss": 0.6097, + "epoch": 0.04, + "grad_norm": 2.9789407731346227, + "learning_rate": 9.998161330540759e-06, + "loss": 0.5987, "step": 541 }, { - "epoch": 0.06, - "grad_norm": 2.39423124719434, - "learning_rate": 9.98097761618392e-06, - "loss": 0.8062, + "epoch": 0.04, + "grad_norm": 1.6956323618645095, + "learning_rate": 9.998130036552127e-06, + "loss": 0.5806, "step": 542 }, { - "epoch": 0.06, - "grad_norm": 2.6461362569919227, - "learning_rate": 9.980828808779275e-06, - "loss": 0.8499, + "epoch": 0.04, + "grad_norm": 1.746859060529278, + "learning_rate": 9.998098478545332e-06, + "loss": 0.5729, "step": 543 }, { - "epoch": 0.06, - "grad_norm": 2.6944205009407094, - "learning_rate": 9.980679422716336e-06, - "loss": 0.8563, + "epoch": 0.04, + "grad_norm": 2.0316384293136003, + "learning_rate": 9.99806665652204e-06, + "loss": 0.6008, "step": 544 }, { - "epoch": 0.06, - "grad_norm": 2.419123667806579, - "learning_rate": 9.98052945801246e-06, - "loss": 0.8623, + "epoch": 0.04, + "grad_norm": 1.6889384247265664, + "learning_rate": 9.99803457048393e-06, + "loss": 0.5743, "step": 545 }, { - "epoch": 0.06, - "grad_norm": 2.7435396717539557, - "learning_rate": 9.980378914685069e-06, - "loss": 0.7444, + "epoch": 0.04, + "grad_norm": 1.1922584396776097, + "learning_rate": 9.9980022204327e-06, + "loss": 0.4776, "step": 546 }, { - "epoch": 0.06, - "grad_norm": 2.662949276618667, - "learning_rate": 9.980227792751653e-06, - "loss": 0.7362, + "epoch": 0.04, + "grad_norm": 1.9205258478117864, + "learning_rate": 9.997969606370057e-06, + "loss": 0.6789, "step": 547 }, { - "epoch": 0.06, - "grad_norm": 2.9210580658144902, - "learning_rate": 9.980076092229767e-06, - "loss": 0.7805, + "epoch": 0.04, + "grad_norm": 2.074542947396767, + "learning_rate": 9.997936728297722e-06, + "loss": 0.6179, "step": 548 }, { - "epoch": 0.06, - "grad_norm": 2.6271159104021105, - "learning_rate": 9.979923813137039e-06, - "loss": 0.7591, + "epoch": 0.04, + "grad_norm": 2.1640790632383022, + "learning_rate": 9.997903586217435e-06, + "loss": 0.6156, "step": 549 }, { - "epoch": 0.06, - "grad_norm": 2.1688198714122695, - "learning_rate": 9.979770955491154e-06, - "loss": 0.7107, + "epoch": 0.04, + "grad_norm": 2.7814762695734596, + "learning_rate": 9.997870180130946e-06, + "loss": 0.5686, "step": 550 }, { - "epoch": 0.06, - "grad_norm": 3.3806001298954946, - "learning_rate": 9.979617519309878e-06, - "loss": 0.7611, + "epoch": 0.04, + "grad_norm": 1.9071650776770406, + "learning_rate": 9.997836510040018e-06, + "loss": 0.636, "step": 551 }, { - "epoch": 0.06, - "grad_norm": 2.6186345183581548, - "learning_rate": 9.97946350461103e-06, - "loss": 0.8052, + "epoch": 0.04, + "grad_norm": 2.1029054969458296, + "learning_rate": 9.997802575946432e-06, + "loss": 0.6782, "step": 552 }, { - "epoch": 0.06, - "grad_norm": 2.434402348311883, - "learning_rate": 9.979308911412508e-06, - "loss": 0.7546, + "epoch": 0.04, + "grad_norm": 1.894825801614073, + "learning_rate": 9.997768377851977e-06, + "loss": 0.6462, "step": 553 }, { - "epoch": 0.06, - "grad_norm": 2.1361701202500973, - "learning_rate": 9.979153739732273e-06, - "loss": 0.6987, + "epoch": 0.04, + "grad_norm": 2.2889123593330307, + "learning_rate": 9.997733915758462e-06, + "loss": 0.6173, "step": 554 }, { - "epoch": 0.06, - "grad_norm": 2.638578499216826, - "learning_rate": 9.978997989588346e-06, - "loss": 0.8184, + "epoch": 0.04, + "grad_norm": 1.9254057223291228, + "learning_rate": 9.997699189667707e-06, + "loss": 0.6613, "step": 555 }, { - "epoch": 0.06, - "grad_norm": 2.397169992403867, - "learning_rate": 9.978841660998827e-06, - "loss": 0.748, + "epoch": 0.04, + "grad_norm": 1.8511083113169775, + "learning_rate": 9.997664199581548e-06, + "loss": 0.6074, "step": 556 }, { - "epoch": 0.06, - "grad_norm": 2.6871052832438886, - "learning_rate": 9.978684753981875e-06, - "loss": 0.8245, + "epoch": 0.04, + "grad_norm": 1.7482663993777725, + "learning_rate": 9.997628945501829e-06, + "loss": 0.5637, "step": 557 }, { - "epoch": 0.06, - "grad_norm": 2.127649979618681, - "learning_rate": 9.978527268555723e-06, - "loss": 0.7135, + "epoch": 0.04, + "grad_norm": 2.010341424619799, + "learning_rate": 9.997593427430416e-06, + "loss": 0.5821, "step": 558 }, { - "epoch": 0.06, - "grad_norm": 2.500931002963333, - "learning_rate": 9.97836920473866e-06, - "loss": 0.7869, + "epoch": 0.04, + "grad_norm": 2.1018379578742588, + "learning_rate": 9.997557645369185e-06, + "loss": 0.5893, "step": 559 }, { - "epoch": 0.06, - "grad_norm": 2.6065448044913357, - "learning_rate": 9.978210562549057e-06, - "loss": 0.7589, + "epoch": 0.04, + "grad_norm": 2.1395267964991374, + "learning_rate": 9.997521599320023e-06, + "loss": 0.6137, "step": 560 }, { - "epoch": 0.06, - "grad_norm": 2.2848105655651274, - "learning_rate": 9.978051342005342e-06, - "loss": 0.7439, + "epoch": 0.04, + "grad_norm": 1.984533208557336, + "learning_rate": 9.997485289284838e-06, + "loss": 0.5361, "step": 561 }, { - "epoch": 0.06, - "grad_norm": 2.3300511317144, - "learning_rate": 9.97789154312601e-06, - "loss": 0.731, + "epoch": 0.04, + "grad_norm": 1.7359252405780785, + "learning_rate": 9.997448715265546e-06, + "loss": 0.6127, "step": 562 }, { - "epoch": 0.06, - "grad_norm": 2.53982707617505, - "learning_rate": 9.97773116592963e-06, - "loss": 0.7874, + "epoch": 0.04, + "grad_norm": 2.03761059267238, + "learning_rate": 9.997411877264079e-06, + "loss": 0.6547, "step": 563 }, { - "epoch": 0.06, - "grad_norm": 3.0472971560396367, - "learning_rate": 9.977570210434831e-06, - "loss": 0.7261, + "epoch": 0.04, + "grad_norm": 1.8115353981017317, + "learning_rate": 9.997374775282383e-06, + "loss": 0.5728, "step": 564 }, { - "epoch": 0.06, - "grad_norm": 2.883803485999733, - "learning_rate": 9.977408676660314e-06, - "loss": 0.8079, + "epoch": 0.04, + "grad_norm": 2.085702852631562, + "learning_rate": 9.997337409322418e-06, + "loss": 0.5901, "step": 565 }, { - "epoch": 0.06, - "grad_norm": 19.573290818215536, - "learning_rate": 9.977246564624845e-06, - "loss": 0.7604, + "epoch": 0.04, + "grad_norm": 3.27602039888933, + "learning_rate": 9.997299779386157e-06, + "loss": 0.5453, "step": 566 }, { - "epoch": 0.06, - "grad_norm": 2.6716091098325245, - "learning_rate": 9.977083874347258e-06, - "loss": 0.6414, + "epoch": 0.04, + "grad_norm": 2.3524361430090353, + "learning_rate": 9.99726188547559e-06, + "loss": 0.5785, "step": 567 }, { - "epoch": 0.06, - "grad_norm": 3.6901090885051846, - "learning_rate": 9.976920605846452e-06, - "loss": 0.8105, + "epoch": 0.04, + "grad_norm": 1.8299057271813801, + "learning_rate": 9.997223727592717e-06, + "loss": 0.5621, "step": 568 }, { - "epoch": 0.06, - "grad_norm": 2.7134792536923555, - "learning_rate": 9.976756759141399e-06, - "loss": 0.7635, + "epoch": 0.04, + "grad_norm": 1.8186636486616559, + "learning_rate": 9.997185305739551e-06, + "loss": 0.5667, "step": 569 }, { - "epoch": 0.06, - "grad_norm": 2.7086458704405434, - "learning_rate": 9.976592334251132e-06, - "loss": 0.8302, + "epoch": 0.04, + "grad_norm": 2.04478643481762, + "learning_rate": 9.997146619918127e-06, + "loss": 0.5872, "step": 570 }, { - "epoch": 0.06, - "grad_norm": 2.1306874134793508, - "learning_rate": 9.976427331194753e-06, - "loss": 0.7362, + "epoch": 0.04, + "grad_norm": 1.9500864604801027, + "learning_rate": 9.997107670130486e-06, + "loss": 0.5196, "step": 571 }, { - "epoch": 0.06, - "grad_norm": 2.45895557799812, - "learning_rate": 9.976261749991433e-06, - "loss": 0.7854, + "epoch": 0.04, + "grad_norm": 1.9522090784217032, + "learning_rate": 9.997068456378684e-06, + "loss": 0.6241, "step": 572 }, { - "epoch": 0.06, - "grad_norm": 2.2266882437527475, - "learning_rate": 9.976095590660407e-06, - "loss": 0.8152, + "epoch": 0.04, + "grad_norm": 1.7383190161017843, + "learning_rate": 9.997028978664796e-06, + "loss": 0.6261, "step": 573 }, { - "epoch": 0.06, - "grad_norm": 2.363404791350861, - "learning_rate": 9.975928853220979e-06, - "loss": 0.8157, + "epoch": 0.04, + "grad_norm": 1.9759852476669626, + "learning_rate": 9.996989236990904e-06, + "loss": 0.5949, "step": 574 }, { - "epoch": 0.06, - "grad_norm": 2.4298149385499475, - "learning_rate": 9.975761537692522e-06, - "loss": 0.7796, + "epoch": 0.04, + "grad_norm": 2.0283355607637072, + "learning_rate": 9.996949231359108e-06, + "loss": 0.6765, "step": 575 }, { - "epoch": 0.06, - "grad_norm": 2.2830133914169424, - "learning_rate": 9.975593644094472e-06, - "loss": 0.7881, + "epoch": 0.04, + "grad_norm": 1.7967996396341426, + "learning_rate": 9.996908961771521e-06, + "loss": 0.5594, "step": 576 }, { - "epoch": 0.06, - "grad_norm": 5.164741876077585, - "learning_rate": 9.975425172446336e-06, - "loss": 0.6478, + "epoch": 0.04, + "grad_norm": 2.0374957318020206, + "learning_rate": 9.996868428230271e-06, + "loss": 0.5022, "step": 577 }, { - "epoch": 0.06, - "grad_norm": 2.1615284561481283, - "learning_rate": 9.975256122767687e-06, - "loss": 0.7038, + "epoch": 0.04, + "grad_norm": 1.718033533035505, + "learning_rate": 9.9968276307375e-06, + "loss": 0.5735, "step": 578 }, { - "epoch": 0.06, - "grad_norm": 2.8554861521662316, - "learning_rate": 9.975086495078161e-06, - "loss": 0.8082, + "epoch": 0.04, + "grad_norm": 1.8645114793545352, + "learning_rate": 9.99678656929536e-06, + "loss": 0.5832, "step": 579 }, { - "epoch": 0.06, - "grad_norm": 2.8793525267467426, - "learning_rate": 9.974916289397469e-06, - "loss": 0.7612, + "epoch": 0.04, + "grad_norm": 2.234258206261488, + "learning_rate": 9.996745243906026e-06, + "loss": 0.6641, "step": 580 }, { - "epoch": 0.06, - "grad_norm": 2.2817512397632775, - "learning_rate": 9.974745505745385e-06, - "loss": 0.7507, + "epoch": 0.04, + "grad_norm": 1.7111107571978625, + "learning_rate": 9.996703654571673e-06, + "loss": 0.5942, "step": 581 }, { - "epoch": 0.06, - "grad_norm": 2.3293064382183752, - "learning_rate": 9.974574144141746e-06, - "loss": 0.782, + "epoch": 0.04, + "grad_norm": 1.8906290161834944, + "learning_rate": 9.996661801294503e-06, + "loss": 0.597, "step": 582 }, { - "epoch": 0.06, - "grad_norm": 2.420863828032633, - "learning_rate": 9.974402204606464e-06, - "loss": 0.7904, + "epoch": 0.04, + "grad_norm": 1.0415238412573753, + "learning_rate": 9.996619684076724e-06, + "loss": 0.4621, "step": 583 }, { - "epoch": 0.06, - "grad_norm": 2.467148177657104, - "learning_rate": 9.974229687159515e-06, - "loss": 0.7148, + "epoch": 0.04, + "grad_norm": 1.6829251385574737, + "learning_rate": 9.996577302920567e-06, + "loss": 0.5932, "step": 584 }, { - "epoch": 0.06, - "grad_norm": 2.3692761510748506, - "learning_rate": 9.974056591820937e-06, - "loss": 0.7572, + "epoch": 0.04, + "grad_norm": 2.247617574473002, + "learning_rate": 9.996534657828263e-06, + "loss": 0.6249, "step": 585 }, { - "epoch": 0.06, - "grad_norm": 2.410599414340472, - "learning_rate": 9.973882918610845e-06, - "loss": 0.7463, + "epoch": 0.04, + "grad_norm": 2.153577055386234, + "learning_rate": 9.996491748802067e-06, + "loss": 0.6987, "step": 586 }, { - "epoch": 0.06, - "grad_norm": 2.407706605193204, - "learning_rate": 9.973708667549413e-06, - "loss": 0.7719, + "epoch": 0.04, + "grad_norm": 0.9914816671035794, + "learning_rate": 9.996448575844247e-06, + "loss": 0.4795, "step": 587 }, { - "epoch": 0.06, - "grad_norm": 2.1487892620296885, - "learning_rate": 9.973533838656886e-06, - "loss": 0.737, + "epoch": 0.04, + "grad_norm": 1.8840199759775895, + "learning_rate": 9.996405138957083e-06, + "loss": 0.6045, "step": 588 }, { - "epoch": 0.06, - "grad_norm": 2.983088881371681, - "learning_rate": 9.973358431953574e-06, - "loss": 0.8082, + "epoch": 0.04, + "grad_norm": 1.8634910862938125, + "learning_rate": 9.996361438142872e-06, + "loss": 0.5621, "step": 589 }, { - "epoch": 0.06, - "grad_norm": 4.84274621933108, - "learning_rate": 9.973182447459856e-06, - "loss": 0.8005, + "epoch": 0.04, + "grad_norm": 1.8643085486343365, + "learning_rate": 9.996317473403917e-06, + "loss": 0.6012, "step": 590 }, { - "epoch": 0.06, - "grad_norm": 2.514268336749957, - "learning_rate": 9.973005885196177e-06, - "loss": 0.8384, + "epoch": 0.04, + "grad_norm": 1.8303324155612233, + "learning_rate": 9.996273244742544e-06, + "loss": 0.6613, "step": 591 }, { - "epoch": 0.06, - "grad_norm": 2.9786767920760076, - "learning_rate": 9.97282874518305e-06, - "loss": 0.8312, + "epoch": 0.04, + "grad_norm": 2.1295288089674824, + "learning_rate": 9.996228752161087e-06, + "loss": 0.6774, "step": 592 }, { - "epoch": 0.06, - "grad_norm": 2.5407053182524333, - "learning_rate": 9.972651027441053e-06, - "loss": 0.8417, + "epoch": 0.04, + "grad_norm": 1.1452371408227051, + "learning_rate": 9.996183995661901e-06, + "loss": 0.4749, "step": 593 }, { - "epoch": 0.06, - "grad_norm": 2.2034669895851526, - "learning_rate": 9.972472731990836e-06, - "loss": 0.7066, + "epoch": 0.04, + "grad_norm": 1.6636188169689397, + "learning_rate": 9.996138975247345e-06, + "loss": 0.5534, "step": 594 }, { - "epoch": 0.06, - "grad_norm": 2.3153560832834725, - "learning_rate": 9.972293858853111e-06, - "loss": 0.7032, + "epoch": 0.04, + "grad_norm": 2.0729021118309223, + "learning_rate": 9.9960936909198e-06, + "loss": 0.6076, "step": 595 }, { - "epoch": 0.06, - "grad_norm": 1.1674873013648661, - "learning_rate": 9.972114408048658e-06, - "loss": 0.6294, + "epoch": 0.04, + "grad_norm": 2.0614673781884516, + "learning_rate": 9.996048142681657e-06, + "loss": 0.5615, "step": 596 }, { - "epoch": 0.06, - "grad_norm": 2.4983825264182977, - "learning_rate": 9.971934379598327e-06, - "loss": 0.8138, + "epoch": 0.04, + "grad_norm": 2.0029918819076733, + "learning_rate": 9.99600233053532e-06, + "loss": 0.6462, "step": 597 }, { - "epoch": 0.06, - "grad_norm": 2.5640167266728517, - "learning_rate": 9.971753773523032e-06, - "loss": 0.7516, + "epoch": 0.04, + "grad_norm": 2.609659529329357, + "learning_rate": 9.995956254483214e-06, + "loss": 0.6315, "step": 598 }, { - "epoch": 0.06, - "grad_norm": 2.819664317552869, - "learning_rate": 9.971572589843754e-06, - "loss": 0.7164, + "epoch": 0.04, + "grad_norm": 1.9087203761711684, + "learning_rate": 9.995909914527768e-06, + "loss": 0.644, "step": 599 }, { - "epoch": 0.06, - "grad_norm": 2.8700762251626, - "learning_rate": 9.971390828581546e-06, - "loss": 0.7128, + "epoch": 0.04, + "grad_norm": 1.8829500333621283, + "learning_rate": 9.995863310671432e-06, + "loss": 0.6312, "step": 600 }, { - "epoch": 0.06, - "grad_norm": 3.2350947513381, - "learning_rate": 9.971208489757522e-06, - "loss": 0.7065, + "epoch": 0.04, + "grad_norm": 1.893944875278428, + "learning_rate": 9.99581644291667e-06, + "loss": 0.5784, "step": 601 }, { - "epoch": 0.06, - "grad_norm": 6.78868545134538, - "learning_rate": 9.971025573392863e-06, - "loss": 0.7491, + "epoch": 0.04, + "grad_norm": 2.9048323204193474, + "learning_rate": 9.995769311265953e-06, + "loss": 0.652, "step": 602 }, { - "epoch": 0.06, - "grad_norm": 2.620861295267675, - "learning_rate": 9.970842079508827e-06, - "loss": 0.8286, + "epoch": 0.04, + "grad_norm": 2.2606312619527498, + "learning_rate": 9.995721915721774e-06, + "loss": 0.6448, "step": 603 }, { - "epoch": 0.06, - "grad_norm": 2.387001074929076, - "learning_rate": 9.970658008126725e-06, - "loss": 0.7537, + "epoch": 0.04, + "grad_norm": 1.9306316318555141, + "learning_rate": 9.995674256286636e-06, + "loss": 0.6581, "step": 604 }, { - "epoch": 0.06, - "grad_norm": 2.356795460296981, - "learning_rate": 9.970473359267945e-06, - "loss": 0.7542, + "epoch": 0.04, + "grad_norm": 2.3962532730879595, + "learning_rate": 9.995626332963053e-06, + "loss": 0.6084, "step": 605 }, { - "epoch": 0.06, - "grad_norm": 1.9886458699286347, - "learning_rate": 9.970288132953938e-06, - "loss": 0.6917, + "epoch": 0.04, + "grad_norm": 2.2562368539082964, + "learning_rate": 9.995578145753563e-06, + "loss": 0.5854, "step": 606 }, { - "epoch": 0.06, - "grad_norm": 2.2751385664982706, - "learning_rate": 9.970102329206221e-06, - "loss": 0.7728, + "epoch": 0.04, + "grad_norm": 1.9210581967569014, + "learning_rate": 9.995529694660707e-06, + "loss": 0.6846, "step": 607 }, { - "epoch": 0.06, - "grad_norm": 2.495226626436587, - "learning_rate": 9.969915948046387e-06, - "loss": 0.6747, + "epoch": 0.04, + "grad_norm": 1.647766560806375, + "learning_rate": 9.995480979687046e-06, + "loss": 0.6169, "step": 608 }, { - "epoch": 0.06, - "grad_norm": 1.4123037303465609, - "learning_rate": 9.969728989496081e-06, - "loss": 0.6243, + "epoch": 0.04, + "grad_norm": 2.13837041386148, + "learning_rate": 9.995432000835153e-06, + "loss": 0.5085, "step": 609 }, { - "epoch": 0.06, - "grad_norm": 2.510919227428744, - "learning_rate": 9.96954145357703e-06, - "loss": 0.7466, + "epoch": 0.04, + "grad_norm": 1.9906969154995522, + "learning_rate": 9.995382758107612e-06, + "loss": 0.6579, "step": 610 }, { - "epoch": 0.06, - "grad_norm": 2.4441972726765404, - "learning_rate": 9.969353340311017e-06, - "loss": 0.7598, + "epoch": 0.04, + "grad_norm": 2.0689845979235857, + "learning_rate": 9.995333251507029e-06, + "loss": 0.6563, "step": 611 }, { - "epoch": 0.06, - "grad_norm": 2.0740206980367337, - "learning_rate": 9.969164649719898e-06, - "loss": 0.7293, + "epoch": 0.04, + "grad_norm": 2.8069274650962255, + "learning_rate": 9.995283481036017e-06, + "loss": 0.6015, "step": 612 }, { - "epoch": 0.06, - "grad_norm": 1.3874311549388647, - "learning_rate": 9.968975381825594e-06, - "loss": 0.6497, + "epoch": 0.04, + "grad_norm": 2.4476288037748337, + "learning_rate": 9.995233446697206e-06, + "loss": 0.6352, "step": 613 }, { - "epoch": 0.06, - "grad_norm": 2.2328817638501284, - "learning_rate": 9.968785536650095e-06, - "loss": 0.7843, + "epoch": 0.04, + "grad_norm": 1.8319136511654073, + "learning_rate": 9.995183148493236e-06, + "loss": 0.5968, "step": 614 }, { - "epoch": 0.06, - "grad_norm": 2.039098314636674, - "learning_rate": 9.968595114215453e-06, - "loss": 0.7148, + "epoch": 0.04, + "grad_norm": 1.8200796281200393, + "learning_rate": 9.99513258642677e-06, + "loss": 0.6012, "step": 615 }, { - "epoch": 0.06, - "grad_norm": 1.8974999023720256, - "learning_rate": 9.968404114543796e-06, - "loss": 0.7553, + "epoch": 0.04, + "grad_norm": 1.882547938777712, + "learning_rate": 9.995081760500471e-06, + "loss": 0.6052, "step": 616 }, { - "epoch": 0.06, - "grad_norm": 2.363258301216911, - "learning_rate": 9.968212537657311e-06, - "loss": 0.7465, + "epoch": 0.04, + "grad_norm": 2.1057028045970614, + "learning_rate": 9.99503067071703e-06, + "loss": 0.6635, "step": 617 }, { - "epoch": 0.07, - "grad_norm": 2.5506185481947714, - "learning_rate": 9.968020383578253e-06, - "loss": 0.8889, + "epoch": 0.04, + "grad_norm": 1.9073253252672766, + "learning_rate": 9.994979317079143e-06, + "loss": 0.6267, "step": 618 }, { - "epoch": 0.07, - "grad_norm": 2.0478689904138068, - "learning_rate": 9.96782765232895e-06, - "loss": 0.7014, + "epoch": 0.04, + "grad_norm": 0.934776660873475, + "learning_rate": 9.994927699589523e-06, + "loss": 0.4859, "step": 619 }, { - "epoch": 0.07, - "grad_norm": 2.1272161891964703, - "learning_rate": 9.967634343931791e-06, - "loss": 0.8825, + "epoch": 0.04, + "grad_norm": 2.166466625934562, + "learning_rate": 9.994875818250898e-06, + "loss": 0.4815, "step": 620 }, { - "epoch": 0.07, - "grad_norm": 2.3598809615068794, - "learning_rate": 9.967440458409232e-06, - "loss": 0.7521, + "epoch": 0.04, + "grad_norm": 1.6409978776045064, + "learning_rate": 9.994823673066006e-06, + "loss": 0.6166, "step": 621 }, { - "epoch": 0.07, - "grad_norm": 2.0759823166768627, - "learning_rate": 9.967245995783801e-06, - "loss": 0.7767, + "epoch": 0.04, + "grad_norm": 1.8921408716100994, + "learning_rate": 9.994771264037602e-06, + "loss": 0.6034, "step": 622 }, { - "epoch": 0.07, - "grad_norm": 2.1835848414184875, - "learning_rate": 9.96705095607809e-06, - "loss": 0.8266, + "epoch": 0.04, + "grad_norm": 1.839631362481778, + "learning_rate": 9.994718591168457e-06, + "loss": 0.5893, "step": 623 }, { - "epoch": 0.07, - "grad_norm": 3.6137036631937254, - "learning_rate": 9.966855339314756e-06, - "loss": 0.857, + "epoch": 0.04, + "grad_norm": 1.58648766790884, + "learning_rate": 9.99466565446135e-06, + "loss": 0.5901, "step": 624 }, { - "epoch": 0.07, - "grad_norm": 2.516218552454981, - "learning_rate": 9.966659145516527e-06, - "loss": 0.8295, + "epoch": 0.04, + "grad_norm": 2.016676054035594, + "learning_rate": 9.99461245391908e-06, + "loss": 0.5767, "step": 625 }, { - "epoch": 0.07, - "grad_norm": 2.179275872622473, - "learning_rate": 9.966462374706196e-06, - "loss": 0.8447, + "epoch": 0.04, + "grad_norm": 1.8959820514271726, + "learning_rate": 9.994558989544456e-06, + "loss": 0.5902, "step": 626 }, { - "epoch": 0.07, - "grad_norm": 2.0487084736863883, - "learning_rate": 9.966265026906622e-06, - "loss": 0.7527, + "epoch": 0.04, + "grad_norm": 1.6985999292016627, + "learning_rate": 9.9945052613403e-06, + "loss": 0.5715, "step": 627 }, { - "epoch": 0.07, - "grad_norm": 2.394315996422968, - "learning_rate": 9.966067102140734e-06, - "loss": 0.7543, + "epoch": 0.04, + "grad_norm": 2.58791575547424, + "learning_rate": 9.994451269309457e-06, + "loss": 0.5815, "step": 628 }, { - "epoch": 0.07, - "grad_norm": 3.0716112519627363, - "learning_rate": 9.965868600431525e-06, - "loss": 0.7141, + "epoch": 0.04, + "grad_norm": 2.3491442763324626, + "learning_rate": 9.99439701345477e-06, + "loss": 0.5783, "step": 629 }, { - "epoch": 0.07, - "grad_norm": 4.924340713473136, - "learning_rate": 9.965669521802057e-06, - "loss": 0.8291, + "epoch": 0.04, + "grad_norm": 0.9750236674257756, + "learning_rate": 9.994342493779112e-06, + "loss": 0.4597, "step": 630 }, { - "epoch": 0.07, - "grad_norm": 2.8596202937468727, - "learning_rate": 9.965469866275457e-06, - "loss": 0.7771, + "epoch": 0.04, + "grad_norm": 1.9419992209003525, + "learning_rate": 9.99428771028536e-06, + "loss": 0.6231, "step": 631 }, { - "epoch": 0.07, - "grad_norm": 2.7307465501368644, - "learning_rate": 9.965269633874924e-06, - "loss": 0.7157, - "step": 632 + "epoch": 0.04, + "grad_norm": 1.8150080040311811, + "learning_rate": 9.994232662976409e-06, + "loss": 0.5802, + "step": 632 }, { - "epoch": 0.07, - "grad_norm": 2.407738691497479, - "learning_rate": 9.965068824623718e-06, - "loss": 0.6772, + "epoch": 0.04, + "grad_norm": 2.201781156406854, + "learning_rate": 9.994177351855162e-06, + "loss": 0.676, "step": 633 }, { - "epoch": 0.07, - "grad_norm": 2.0971229549109176, - "learning_rate": 9.964867438545166e-06, - "loss": 0.7709, + "epoch": 0.04, + "grad_norm": 2.0996473010097976, + "learning_rate": 9.99412177692455e-06, + "loss": 0.548, "step": 634 }, { - "epoch": 0.07, - "grad_norm": 2.90404822468166, - "learning_rate": 9.964665475662668e-06, - "loss": 0.7457, + "epoch": 0.05, + "grad_norm": 2.025498163061105, + "learning_rate": 9.9940659381875e-06, + "loss": 0.5751, "step": 635 }, { - "epoch": 0.07, - "grad_norm": 2.213670720198414, - "learning_rate": 9.964462935999688e-06, - "loss": 0.8134, + "epoch": 0.05, + "grad_norm": 10.289524425441089, + "learning_rate": 9.994009835646967e-06, + "loss": 0.6241, "step": 636 }, { - "epoch": 0.07, - "grad_norm": 2.8026473036892554, - "learning_rate": 9.964259819579754e-06, - "loss": 0.7484, + "epoch": 0.05, + "grad_norm": 2.1201402461343104, + "learning_rate": 9.99395346930591e-06, + "loss": 0.5199, "step": 637 }, { - "epoch": 0.07, - "grad_norm": 2.4715032303759554, - "learning_rate": 9.964056126426464e-06, - "loss": 0.851, + "epoch": 0.05, + "grad_norm": 2.0171378139086538, + "learning_rate": 9.993896839167311e-06, + "loss": 0.6237, "step": 638 }, { - "epoch": 0.07, - "grad_norm": 1.9713105204657861, - "learning_rate": 9.963851856563483e-06, - "loss": 0.7706, + "epoch": 0.05, + "grad_norm": 2.2402563523612335, + "learning_rate": 9.993839945234158e-06, + "loss": 0.6086, "step": 639 }, { - "epoch": 0.07, - "grad_norm": 2.0696765042050407, - "learning_rate": 9.963647010014541e-06, - "loss": 0.7526, + "epoch": 0.05, + "grad_norm": 3.5640244424367955, + "learning_rate": 9.993782787509458e-06, + "loss": 0.5998, "step": 640 }, { - "epoch": 0.07, - "grad_norm": 2.61319517784048, - "learning_rate": 9.963441586803439e-06, - "loss": 0.7249, + "epoch": 0.05, + "grad_norm": 3.2672426170269406, + "learning_rate": 9.99372536599623e-06, + "loss": 0.5954, "step": 641 }, { - "epoch": 0.07, - "grad_norm": 35.735839142292704, - "learning_rate": 9.963235586954043e-06, - "loss": 0.807, + "epoch": 0.05, + "grad_norm": 1.8691792865076111, + "learning_rate": 9.993667680697508e-06, + "loss": 0.6211, "step": 642 }, { - "epoch": 0.07, - "grad_norm": 2.601165133281576, - "learning_rate": 9.963029010490281e-06, - "loss": 0.6441, + "epoch": 0.05, + "grad_norm": 2.4739355123404234, + "learning_rate": 9.993609731616338e-06, + "loss": 0.6191, "step": 643 }, { - "epoch": 0.07, - "grad_norm": 2.700590007062543, - "learning_rate": 9.962821857436156e-06, - "loss": 0.699, + "epoch": 0.05, + "grad_norm": 3.3412737850790286, + "learning_rate": 9.99355151875578e-06, + "loss": 0.6296, "step": 644 }, { - "epoch": 0.07, - "grad_norm": 2.83901661028331, - "learning_rate": 9.962614127815735e-06, - "loss": 0.715, + "epoch": 0.05, + "grad_norm": 1.0979368691498586, + "learning_rate": 9.99349304211891e-06, + "loss": 0.4555, "step": 645 }, { - "epoch": 0.07, - "grad_norm": 3.2052448381433063, - "learning_rate": 9.96240582165315e-06, - "loss": 0.7545, + "epoch": 0.05, + "grad_norm": 1.889663948757811, + "learning_rate": 9.99343430170882e-06, + "loss": 0.5793, "step": 646 }, { - "epoch": 0.07, - "grad_norm": 3.50569734388418, - "learning_rate": 9.962196938972599e-06, - "loss": 0.7477, + "epoch": 0.05, + "grad_norm": 2.1341357915242445, + "learning_rate": 9.99337529752861e-06, + "loss": 0.6119, "step": 647 }, { - "epoch": 0.07, - "grad_norm": 3.3913535336435077, - "learning_rate": 9.961987479798354e-06, - "loss": 0.6525, + "epoch": 0.05, + "grad_norm": 1.7857264782500004, + "learning_rate": 9.993316029581394e-06, + "loss": 0.6522, "step": 648 }, { - "epoch": 0.07, - "grad_norm": 3.97776453246874, - "learning_rate": 9.961777444154747e-06, - "loss": 0.7575, + "epoch": 0.05, + "grad_norm": 1.7820128825840982, + "learning_rate": 9.993256497870307e-06, + "loss": 0.5664, "step": 649 }, { - "epoch": 0.07, - "grad_norm": 2.6326354312277207, - "learning_rate": 9.96156683206618e-06, - "loss": 0.7891, + "epoch": 0.05, + "grad_norm": 1.8579943705886592, + "learning_rate": 9.993196702398494e-06, + "loss": 0.5894, "step": 650 }, { - "epoch": 0.07, - "grad_norm": 2.590711433166387, - "learning_rate": 9.96135564355712e-06, - "loss": 0.8471, + "epoch": 0.05, + "grad_norm": 2.096513359023596, + "learning_rate": 9.99313664316911e-06, + "loss": 0.5434, "step": 651 }, { - "epoch": 0.07, - "grad_norm": 2.694706096312992, - "learning_rate": 9.961143878652104e-06, - "loss": 0.7534, + "epoch": 0.05, + "grad_norm": 2.036287397415716, + "learning_rate": 9.993076320185329e-06, + "loss": 0.6504, "step": 652 }, { - "epoch": 0.07, - "grad_norm": 2.1493792498317736, - "learning_rate": 9.960931537375731e-06, - "loss": 0.7545, + "epoch": 0.05, + "grad_norm": 1.7364652479511444, + "learning_rate": 9.993015733450337e-06, + "loss": 0.5249, "step": 653 }, { - "epoch": 0.07, - "grad_norm": 2.547411269212289, - "learning_rate": 9.960718619752676e-06, - "loss": 0.7998, + "epoch": 0.05, + "grad_norm": 1.981323459670183, + "learning_rate": 9.992954882967339e-06, + "loss": 0.6371, "step": 654 }, { - "epoch": 0.07, - "grad_norm": 2.380301424223435, - "learning_rate": 9.96050512580767e-06, - "loss": 0.6799, + "epoch": 0.05, + "grad_norm": 3.9594070547795286, + "learning_rate": 9.992893768739542e-06, + "loss": 0.5844, "step": 655 }, { - "epoch": 0.07, - "grad_norm": 2.317215606575152, - "learning_rate": 9.960291055565518e-06, - "loss": 0.8361, + "epoch": 0.05, + "grad_norm": 1.9732400271073796, + "learning_rate": 9.99283239077018e-06, + "loss": 0.6039, "step": 656 }, { - "epoch": 0.07, - "grad_norm": 2.76463251057736, - "learning_rate": 9.96007640905109e-06, - "loss": 0.7474, + "epoch": 0.05, + "grad_norm": 2.054525898926972, + "learning_rate": 9.992770749062492e-06, + "loss": 0.5595, "step": 657 }, { - "epoch": 0.07, - "grad_norm": 2.337177345644392, - "learning_rate": 9.959861186289324e-06, - "loss": 0.7889, + "epoch": 0.05, + "grad_norm": 1.980638941845807, + "learning_rate": 9.992708843619737e-06, + "loss": 0.6492, "step": 658 }, { - "epoch": 0.07, - "grad_norm": 2.549090640868925, - "learning_rate": 9.95964538730522e-06, - "loss": 0.8025, + "epoch": 0.05, + "grad_norm": 2.0620395684410373, + "learning_rate": 9.992646674445182e-06, + "loss": 0.5939, "step": 659 }, { - "epoch": 0.07, - "grad_norm": 2.530522238161703, - "learning_rate": 9.959429012123853e-06, - "loss": 0.715, + "epoch": 0.05, + "grad_norm": 2.867374980847205, + "learning_rate": 9.992584241542112e-06, + "loss": 0.6485, "step": 660 }, { - "epoch": 0.07, - "grad_norm": 2.515390648368771, - "learning_rate": 9.95921206077036e-06, - "loss": 0.7851, + "epoch": 0.05, + "grad_norm": 1.9988354811696056, + "learning_rate": 9.992521544913827e-06, + "loss": 0.5973, "step": 661 }, { - "epoch": 0.07, - "grad_norm": 2.929755415997428, - "learning_rate": 9.958994533269947e-06, - "loss": 0.7982, + "epoch": 0.05, + "grad_norm": 1.658786690420108, + "learning_rate": 9.992458584563635e-06, + "loss": 0.5714, "step": 662 }, { - "epoch": 0.07, - "grad_norm": 2.450266556660888, - "learning_rate": 9.958776429647882e-06, - "loss": 0.6835, + "epoch": 0.05, + "grad_norm": 1.6651325182711714, + "learning_rate": 9.992395360494865e-06, + "loss": 0.5298, "step": 663 }, { - "epoch": 0.07, - "grad_norm": 2.5343938278654954, - "learning_rate": 9.958557749929507e-06, - "loss": 0.7832, + "epoch": 0.05, + "grad_norm": 1.4979047030329098, + "learning_rate": 9.992331872710855e-06, + "loss": 0.5016, "step": 664 }, { - "epoch": 0.07, - "grad_norm": 2.2961561118476643, - "learning_rate": 9.958338494140226e-06, - "loss": 0.7488, + "epoch": 0.05, + "grad_norm": 1.96395762051117, + "learning_rate": 9.992268121214958e-06, + "loss": 0.5802, "step": 665 }, { - "epoch": 0.07, - "grad_norm": 2.0185829134658593, - "learning_rate": 9.958118662305512e-06, - "loss": 0.6639, + "epoch": 0.05, + "grad_norm": 2.535952452393718, + "learning_rate": 9.992204106010544e-06, + "loss": 0.6566, "step": 666 }, { - "epoch": 0.07, - "grad_norm": 2.6097834515400935, - "learning_rate": 9.957898254450904e-06, - "loss": 0.8354, + "epoch": 0.05, + "grad_norm": 1.9773069147119862, + "learning_rate": 9.992139827100994e-06, + "loss": 0.6176, "step": 667 }, { - "epoch": 0.07, - "grad_norm": 2.681329545781412, - "learning_rate": 9.957677270602009e-06, - "loss": 0.8243, + "epoch": 0.05, + "grad_norm": 1.607972912448335, + "learning_rate": 9.9920752844897e-06, + "loss": 0.5853, "step": 668 }, { - "epoch": 0.07, - "grad_norm": 3.0534672454472056, - "learning_rate": 9.957455710784499e-06, - "loss": 0.7536, + "epoch": 0.05, + "grad_norm": 1.0613846436850072, + "learning_rate": 9.992010478180076e-06, + "loss": 0.4776, "step": 669 }, { - "epoch": 0.07, - "grad_norm": 2.306313918952685, - "learning_rate": 9.957233575024114e-06, - "loss": 0.831, + "epoch": 0.05, + "grad_norm": 1.8816910942919807, + "learning_rate": 9.991945408175543e-06, + "loss": 0.6354, "step": 670 }, { - "epoch": 0.07, - "grad_norm": 2.188405047316281, - "learning_rate": 9.957010863346665e-06, - "loss": 0.7803, + "epoch": 0.05, + "grad_norm": 1.6002999125250774, + "learning_rate": 9.991880074479538e-06, + "loss": 0.4938, "step": 671 }, { - "epoch": 0.07, - "grad_norm": 3.0129653737744073, - "learning_rate": 9.956787575778022e-06, - "loss": 0.721, + "epoch": 0.05, + "grad_norm": 1.7764153422047582, + "learning_rate": 9.991814477095514e-06, + "loss": 0.6328, "step": 672 }, { - "epoch": 0.07, - "grad_norm": 2.3823769975743683, - "learning_rate": 9.956563712344127e-06, - "loss": 0.7449, + "epoch": 0.05, + "grad_norm": 1.8279768864771238, + "learning_rate": 9.991748616026935e-06, + "loss": 0.5961, "step": 673 }, { - "epoch": 0.07, - "grad_norm": 2.5712155219456476, - "learning_rate": 9.956339273070988e-06, - "loss": 0.7693, + "epoch": 0.05, + "grad_norm": 1.1152115665653053, + "learning_rate": 9.991682491277277e-06, + "loss": 0.4844, "step": 674 }, { - "epoch": 0.07, - "grad_norm": 2.917447740270616, - "learning_rate": 9.95611425798468e-06, - "loss": 0.7564, + "epoch": 0.05, + "grad_norm": 2.3295346142732876, + "learning_rate": 9.991616102850039e-06, + "loss": 0.5781, "step": 675 }, { - "epoch": 0.07, - "grad_norm": 2.441012766615215, - "learning_rate": 9.955888667111341e-06, - "loss": 0.7311, + "epoch": 0.05, + "grad_norm": 1.7780347625214579, + "learning_rate": 9.991549450748722e-06, + "loss": 0.5797, "step": 676 }, { - "epoch": 0.07, - "grad_norm": 2.3439122547341844, - "learning_rate": 9.955662500477185e-06, - "loss": 0.6111, + "epoch": 0.05, + "grad_norm": 1.68734366488098, + "learning_rate": 9.991482534976851e-06, + "loss": 0.5634, "step": 677 }, { - "epoch": 0.07, - "grad_norm": 2.3472763771803153, - "learning_rate": 9.955435758108488e-06, - "loss": 0.7722, + "epoch": 0.05, + "grad_norm": 1.690177466130356, + "learning_rate": 9.991415355537956e-06, + "loss": 0.5521, "step": 678 }, { - "epoch": 0.07, - "grad_norm": 3.0118308796961055, - "learning_rate": 9.955208440031586e-06, - "loss": 0.7479, + "epoch": 0.05, + "grad_norm": 1.7402835701927133, + "learning_rate": 9.991347912435592e-06, + "loss": 0.6335, "step": 679 }, { - "epoch": 0.07, - "grad_norm": 2.5164721040624083, - "learning_rate": 9.954980546272892e-06, - "loss": 0.7528, + "epoch": 0.05, + "grad_norm": 2.293034203874179, + "learning_rate": 9.991280205673317e-06, + "loss": 0.6081, "step": 680 }, { - "epoch": 0.07, - "grad_norm": 2.349312862665128, - "learning_rate": 9.95475207685888e-06, - "loss": 0.7895, + "epoch": 0.05, + "grad_norm": 2.211166476269173, + "learning_rate": 9.991212235254708e-06, + "loss": 0.5786, "step": 681 }, { - "epoch": 0.07, - "grad_norm": 3.538231437183247, - "learning_rate": 9.954523031816096e-06, - "loss": 0.7881, + "epoch": 0.05, + "grad_norm": 1.7961350064102972, + "learning_rate": 9.99114400118336e-06, + "loss": 0.6046, "step": 682 }, { - "epoch": 0.07, - "grad_norm": 2.2748356407545756, - "learning_rate": 9.95429341117115e-06, - "loss": 0.7373, + "epoch": 0.05, + "grad_norm": 2.021768418850511, + "learning_rate": 9.991075503462868e-06, + "loss": 0.6187, "step": 683 }, { - "epoch": 0.07, - "grad_norm": 2.690864131773263, - "learning_rate": 9.954063214950715e-06, - "loss": 0.7729, + "epoch": 0.05, + "grad_norm": 2.136481016666777, + "learning_rate": 9.99100674209686e-06, + "loss": 0.6157, "step": 684 }, { - "epoch": 0.07, - "grad_norm": 2.441689564432529, - "learning_rate": 9.953832443181536e-06, - "loss": 0.6846, + "epoch": 0.05, + "grad_norm": 2.2774946357777157, + "learning_rate": 9.990937717088963e-06, + "loss": 0.649, "step": 685 }, { - "epoch": 0.07, - "grad_norm": 2.7063998770038253, - "learning_rate": 9.953601095890425e-06, - "loss": 0.7764, + "epoch": 0.05, + "grad_norm": 1.846335231775904, + "learning_rate": 9.990868428442824e-06, + "loss": 0.5918, "step": 686 }, { - "epoch": 0.07, - "grad_norm": 2.3897375113199755, - "learning_rate": 9.953369173104256e-06, - "loss": 0.7468, + "epoch": 0.05, + "grad_norm": 2.0884213031246848, + "learning_rate": 9.990798876162102e-06, + "loss": 0.5976, "step": 687 }, { - "epoch": 0.07, - "grad_norm": 5.623372216112194, - "learning_rate": 9.953136674849978e-06, - "loss": 0.7766, + "epoch": 0.05, + "grad_norm": 1.0405607952938931, + "learning_rate": 9.990729060250473e-06, + "loss": 0.4713, "step": 688 }, { - "epoch": 0.07, - "grad_norm": 2.33678410607246, - "learning_rate": 9.952903601154598e-06, - "loss": 0.7899, + "epoch": 0.05, + "grad_norm": 0.9680776501509842, + "learning_rate": 9.990658980711625e-06, + "loss": 0.4875, "step": 689 }, { - "epoch": 0.07, - "grad_norm": 5.7100874309999385, - "learning_rate": 9.952669952045196e-06, - "loss": 0.6976, + "epoch": 0.05, + "grad_norm": 1.721400562805146, + "learning_rate": 9.99058863754926e-06, + "loss": 0.5808, "step": 690 }, { - "epoch": 0.07, - "grad_norm": 2.4385704663154444, - "learning_rate": 9.952435727548915e-06, - "loss": 0.7834, + "epoch": 0.05, + "grad_norm": 1.862140322640572, + "learning_rate": 9.990518030767089e-06, + "loss": 0.6422, "step": 691 }, { - "epoch": 0.07, - "grad_norm": 2.466989152583984, - "learning_rate": 9.952200927692965e-06, - "loss": 0.7762, + "epoch": 0.05, + "grad_norm": 1.7741570272621363, + "learning_rate": 9.990447160368848e-06, + "loss": 0.6235, "step": 692 }, { - "epoch": 0.07, - "grad_norm": 2.4093135917267245, - "learning_rate": 9.95196555250463e-06, - "loss": 0.6769, + "epoch": 0.05, + "grad_norm": 1.8559871042191343, + "learning_rate": 9.990376026358277e-06, + "loss": 0.6259, "step": 693 }, { - "epoch": 0.07, - "grad_norm": 2.3338351134830537, - "learning_rate": 9.95172960201125e-06, - "loss": 0.7501, + "epoch": 0.05, + "grad_norm": 1.0405620118675403, + "learning_rate": 9.990304628739135e-06, + "loss": 0.4507, "step": 694 }, { - "epoch": 0.07, - "grad_norm": 2.3101279059282334, - "learning_rate": 9.95149307624024e-06, - "loss": 0.7034, + "epoch": 0.05, + "grad_norm": 1.9698840253662522, + "learning_rate": 9.990232967515194e-06, + "loss": 0.5742, "step": 695 }, { - "epoch": 0.07, - "grad_norm": 2.2681781457320707, - "learning_rate": 9.951255975219076e-06, - "loss": 0.8163, + "epoch": 0.05, + "grad_norm": 2.2168067833320473, + "learning_rate": 9.990161042690238e-06, + "loss": 0.5785, "step": 696 }, { - "epoch": 0.07, - "grad_norm": 2.79211360891463, - "learning_rate": 9.951018298975306e-06, - "loss": 0.7878, + "epoch": 0.05, + "grad_norm": 1.9608609007903917, + "learning_rate": 9.990088854268067e-06, + "loss": 0.5851, "step": 697 }, { - "epoch": 0.07, - "grad_norm": 1.9232352000611326, - "learning_rate": 9.950780047536543e-06, - "loss": 0.74, + "epoch": 0.05, + "grad_norm": 1.8542317176827354, + "learning_rate": 9.990016402252494e-06, + "loss": 0.5832, "step": 698 }, { - "epoch": 0.07, - "grad_norm": 2.750774856582151, - "learning_rate": 9.950541220930463e-06, - "loss": 0.7653, + "epoch": 0.05, + "grad_norm": 1.8118017063530996, + "learning_rate": 9.989943686647345e-06, + "loss": 0.5826, "step": 699 }, { - "epoch": 0.07, - "grad_norm": 2.046202452399702, - "learning_rate": 9.950301819184816e-06, - "loss": 0.6924, + "epoch": 0.05, + "grad_norm": 1.8955544786274217, + "learning_rate": 9.989870707456463e-06, + "loss": 0.6663, "step": 700 }, { - "epoch": 0.07, - "grad_norm": 2.5862040137998457, - "learning_rate": 9.950061842327415e-06, - "loss": 0.7198, + "epoch": 0.05, + "grad_norm": 1.9250891838900077, + "learning_rate": 9.989797464683702e-06, + "loss": 0.5863, "step": 701 }, { - "epoch": 0.07, - "grad_norm": 2.9170844730186487, - "learning_rate": 9.949821290386137e-06, - "loss": 0.7769, + "epoch": 0.05, + "grad_norm": 1.8581517726714682, + "learning_rate": 9.98972395833293e-06, + "loss": 0.5978, "step": 702 }, { - "epoch": 0.07, - "grad_norm": 2.638513502534882, - "learning_rate": 9.94958016338893e-06, - "loss": 0.7465, + "epoch": 0.05, + "grad_norm": 1.9725233008991974, + "learning_rate": 9.989650188408034e-06, + "loss": 0.6514, "step": 703 }, { - "epoch": 0.07, - "grad_norm": 3.154543573950718, - "learning_rate": 9.949338461363807e-06, - "loss": 0.7234, + "epoch": 0.05, + "grad_norm": 1.8821647649277196, + "learning_rate": 9.989576154912905e-06, + "loss": 0.6198, "step": 704 }, { - "epoch": 0.07, - "grad_norm": 2.4170777296764943, - "learning_rate": 9.949096184338849e-06, - "loss": 0.7579, + "epoch": 0.05, + "grad_norm": 2.0526053250844285, + "learning_rate": 9.989501857851458e-06, + "loss": 0.5926, "step": 705 }, { - "epoch": 0.07, - "grad_norm": 3.1649853433830772, - "learning_rate": 9.948853332342202e-06, - "loss": 0.6416, + "epoch": 0.05, + "grad_norm": 1.7825672812827749, + "learning_rate": 9.989427297227615e-06, + "loss": 0.6593, "step": 706 }, { - "epoch": 0.07, - "grad_norm": 2.2732580001069485, - "learning_rate": 9.948609905402082e-06, - "loss": 0.7448, + "epoch": 0.05, + "grad_norm": 2.021422961184575, + "learning_rate": 9.989352473045317e-06, + "loss": 0.5922, "step": 707 }, { - "epoch": 0.07, - "grad_norm": 3.0109624448260335, - "learning_rate": 9.948365903546766e-06, - "loss": 0.8066, + "epoch": 0.05, + "grad_norm": 2.620450632686968, + "learning_rate": 9.989277385308515e-06, + "loss": 0.6479, "step": 708 }, { - "epoch": 0.07, - "grad_norm": 3.5719103836155117, - "learning_rate": 9.948121326804604e-06, - "loss": 0.6297, + "epoch": 0.05, + "grad_norm": 1.7199596143725084, + "learning_rate": 9.989202034021175e-06, + "loss": 0.5499, "step": 709 }, { - "epoch": 0.07, - "grad_norm": 1.6740846319075147, - "learning_rate": 9.947876175204013e-06, - "loss": 0.6353, + "epoch": 0.05, + "grad_norm": 1.8673878608490122, + "learning_rate": 9.989126419187276e-06, + "loss": 0.5511, "step": 710 }, { - "epoch": 0.07, - "grad_norm": 2.8270316407346163, - "learning_rate": 9.947630448773468e-06, - "loss": 0.7751, + "epoch": 0.05, + "grad_norm": 2.3538825430962333, + "learning_rate": 9.989050540810817e-06, + "loss": 0.6498, "step": 711 }, { - "epoch": 0.07, - "grad_norm": 2.8283132421827895, - "learning_rate": 9.94738414754152e-06, - "loss": 0.8027, + "epoch": 0.05, + "grad_norm": 1.9771713273117246, + "learning_rate": 9.988974398895802e-06, + "loss": 0.6307, "step": 712 }, { - "epoch": 0.08, - "grad_norm": 2.332076932637489, - "learning_rate": 9.947137271536784e-06, - "loss": 0.6781, + "epoch": 0.05, + "grad_norm": 1.8256973729041028, + "learning_rate": 9.988897993446255e-06, + "loss": 0.5723, "step": 713 }, { - "epoch": 0.08, - "grad_norm": 2.3484004490599073, - "learning_rate": 9.94688982078794e-06, - "loss": 0.6904, + "epoch": 0.05, + "grad_norm": 2.2457931487501672, + "learning_rate": 9.988821324466212e-06, + "loss": 0.5963, "step": 714 }, { - "epoch": 0.08, - "grad_norm": 2.1432128787457025, - "learning_rate": 9.946641795323737e-06, - "loss": 0.7361, + "epoch": 0.05, + "grad_norm": 1.5014812571268752, + "learning_rate": 9.98874439195972e-06, + "loss": 0.5244, "step": 715 }, { - "epoch": 0.08, - "grad_norm": 1.6418533463624745, - "learning_rate": 9.946393195172987e-06, - "loss": 0.63, + "epoch": 0.05, + "grad_norm": 2.0090665835666646, + "learning_rate": 9.988667195930846e-06, + "loss": 0.5999, "step": 716 }, { - "epoch": 0.08, - "grad_norm": 2.2838183539614234, - "learning_rate": 9.946144020364576e-06, - "loss": 0.7464, + "epoch": 0.05, + "grad_norm": 1.7499056050524848, + "learning_rate": 9.988589736383667e-06, + "loss": 0.6345, "step": 717 }, { - "epoch": 0.08, - "grad_norm": 2.955408112768042, - "learning_rate": 9.945894270927452e-06, - "loss": 0.7333, + "epoch": 0.05, + "grad_norm": 2.6706843120371166, + "learning_rate": 9.988512013322274e-06, + "loss": 0.6352, "step": 718 }, { - "epoch": 0.08, - "grad_norm": 2.565202439751648, - "learning_rate": 9.945643946890628e-06, - "loss": 0.6788, + "epoch": 0.05, + "grad_norm": 2.0227458133158205, + "learning_rate": 9.988434026750773e-06, + "loss": 0.5145, "step": 719 }, { - "epoch": 0.08, - "grad_norm": 1.987883523633849, - "learning_rate": 9.945393048283186e-06, - "loss": 0.7369, + "epoch": 0.05, + "grad_norm": 1.109659419861857, + "learning_rate": 9.988355776673284e-06, + "loss": 0.4716, "step": 720 }, { - "epoch": 0.08, - "grad_norm": 2.305645743958634, - "learning_rate": 9.945141575134275e-06, - "loss": 0.8795, + "epoch": 0.05, + "grad_norm": 1.9570468692611225, + "learning_rate": 9.98827726309394e-06, + "loss": 0.5445, "step": 721 }, { - "epoch": 0.08, - "grad_norm": 4.342801724950221, - "learning_rate": 9.944889527473112e-06, - "loss": 0.7749, + "epoch": 0.05, + "grad_norm": 2.3009962967242723, + "learning_rate": 9.988198486016888e-06, + "loss": 0.5733, "step": 722 }, { - "epoch": 0.08, - "grad_norm": 2.6867592587630953, - "learning_rate": 9.944636905328977e-06, - "loss": 0.7122, + "epoch": 0.05, + "grad_norm": 1.8230708768771242, + "learning_rate": 9.988119445446291e-06, + "loss": 0.5244, "step": 723 }, { - "epoch": 0.08, - "grad_norm": 2.7963278085386087, - "learning_rate": 9.94438370873122e-06, - "loss": 0.7286, + "epoch": 0.05, + "grad_norm": 4.791741033426039, + "learning_rate": 9.988040141386322e-06, + "loss": 0.6092, "step": 724 }, { - "epoch": 0.08, - "grad_norm": 3.0500060514327645, - "learning_rate": 9.944129937709255e-06, - "loss": 0.8238, + "epoch": 0.05, + "grad_norm": 2.545432878025286, + "learning_rate": 9.98796057384117e-06, + "loss": 0.6339, "step": 725 }, { - "epoch": 0.08, - "grad_norm": 2.518747853267898, - "learning_rate": 9.943875592292569e-06, - "loss": 0.7468, + "epoch": 0.05, + "grad_norm": 2.5015504078125064, + "learning_rate": 9.98788074281504e-06, + "loss": 0.575, "step": 726 }, { - "epoch": 0.08, - "grad_norm": 2.3307749408071023, - "learning_rate": 9.943620672510706e-06, - "loss": 0.784, + "epoch": 0.05, + "grad_norm": 1.8551388928328785, + "learning_rate": 9.987800648312146e-06, + "loss": 0.6201, "step": 727 }, { - "epoch": 0.08, - "grad_norm": 2.2927298280547608, - "learning_rate": 9.943365178393283e-06, - "loss": 0.6643, + "epoch": 0.05, + "grad_norm": 2.486619442839847, + "learning_rate": 9.987720290336725e-06, + "loss": 0.5868, "step": 728 }, { - "epoch": 0.08, - "grad_norm": 5.413030343240209, - "learning_rate": 9.943109109969985e-06, - "loss": 0.7718, + "epoch": 0.05, + "grad_norm": 1.9814906054947357, + "learning_rate": 9.987639668893015e-06, + "loss": 0.6283, "step": 729 }, { - "epoch": 0.08, - "grad_norm": 1.985911781478991, - "learning_rate": 9.94285246727056e-06, - "loss": 0.7437, + "epoch": 0.05, + "grad_norm": 1.6182202843582136, + "learning_rate": 9.987558783985275e-06, + "loss": 0.5799, "step": 730 }, { - "epoch": 0.08, - "grad_norm": 2.8515953816737416, - "learning_rate": 9.942595250324823e-06, - "loss": 0.7, + "epoch": 0.05, + "grad_norm": 1.6898071419434377, + "learning_rate": 9.987477635617783e-06, + "loss": 0.5757, "step": 731 }, { - "epoch": 0.08, - "grad_norm": 2.567107917615768, - "learning_rate": 9.942337459162657e-06, - "loss": 0.6874, + "epoch": 0.05, + "grad_norm": 1.8218352562318085, + "learning_rate": 9.987396223794822e-06, + "loss": 0.6575, "step": 732 }, { - "epoch": 0.08, - "grad_norm": 2.389536459691384, - "learning_rate": 9.942079093814012e-06, - "loss": 0.7786, + "epoch": 0.05, + "grad_norm": 1.6672154323012864, + "learning_rate": 9.987314548520693e-06, + "loss": 0.5817, "step": 733 }, { - "epoch": 0.08, - "grad_norm": 2.456329041193247, - "learning_rate": 9.941820154308905e-06, - "loss": 0.6675, + "epoch": 0.05, + "grad_norm": 1.836994107573075, + "learning_rate": 9.987232609799709e-06, + "loss": 0.5299, "step": 734 }, { - "epoch": 0.08, - "grad_norm": 2.2902527313975454, - "learning_rate": 9.941560640677417e-06, - "loss": 0.7431, + "epoch": 0.05, + "grad_norm": 1.6276611932796248, + "learning_rate": 9.9871504076362e-06, + "loss": 0.5623, "step": 735 }, { - "epoch": 0.08, - "grad_norm": 1.9299279456459286, - "learning_rate": 9.941300552949697e-06, - "loss": 0.7444, + "epoch": 0.05, + "grad_norm": 2.219553753507082, + "learning_rate": 9.987067942034507e-06, + "loss": 0.5874, "step": 736 }, { - "epoch": 0.08, - "grad_norm": 2.894177677033512, - "learning_rate": 9.941039891155964e-06, - "loss": 0.6389, + "epoch": 0.05, + "grad_norm": 2.069500168938439, + "learning_rate": 9.98698521299899e-06, + "loss": 0.6126, "step": 737 }, { - "epoch": 0.08, - "grad_norm": 2.1178139348238716, - "learning_rate": 9.940778655326499e-06, - "loss": 0.7812, + "epoch": 0.05, + "grad_norm": 1.286709045343945, + "learning_rate": 9.986902220534011e-06, + "loss": 0.4744, "step": 738 }, { - "epoch": 0.08, - "grad_norm": 2.6341684151897957, - "learning_rate": 9.940516845491653e-06, - "loss": 0.7911, + "epoch": 0.05, + "grad_norm": 1.7888718474800211, + "learning_rate": 9.986818964643963e-06, + "loss": 0.5514, "step": 739 }, { - "epoch": 0.08, - "grad_norm": 2.3572249944985764, - "learning_rate": 9.940254461681841e-06, - "loss": 0.7095, + "epoch": 0.05, + "grad_norm": 1.8180110567142151, + "learning_rate": 9.986735445333238e-06, + "loss": 0.6424, "step": 740 }, { - "epoch": 0.08, - "grad_norm": 2.392416707308911, - "learning_rate": 9.939991503927548e-06, - "loss": 0.7532, + "epoch": 0.05, + "grad_norm": 1.7807572474024047, + "learning_rate": 9.986651662606248e-06, + "loss": 0.6102, "step": 741 }, { - "epoch": 0.08, - "grad_norm": 2.892105469013813, - "learning_rate": 9.939727972259321e-06, - "loss": 0.8392, + "epoch": 0.05, + "grad_norm": 1.8021843905368713, + "learning_rate": 9.986567616467421e-06, + "loss": 0.5764, "step": 742 }, { - "epoch": 0.08, - "grad_norm": 2.5792946557866454, - "learning_rate": 9.939463866707777e-06, - "loss": 0.7718, + "epoch": 0.05, + "grad_norm": 2.533005401427882, + "learning_rate": 9.986483306921196e-06, + "loss": 0.5939, "step": 743 }, { - "epoch": 0.08, - "grad_norm": 2.382428104409083, - "learning_rate": 9.939199187303598e-06, - "loss": 0.6834, + "epoch": 0.05, + "grad_norm": 1.7168049776532563, + "learning_rate": 9.986398733972027e-06, + "loss": 0.5594, "step": 744 }, { - "epoch": 0.08, - "grad_norm": 2.963009035348591, - "learning_rate": 9.938933934077539e-06, - "loss": 0.749, + "epoch": 0.05, + "grad_norm": 10.65973041477218, + "learning_rate": 9.98631389762438e-06, + "loss": 0.5972, "step": 745 }, { - "epoch": 0.08, - "grad_norm": 2.3169431674281125, - "learning_rate": 9.93866810706041e-06, - "loss": 0.7598, + "epoch": 0.05, + "grad_norm": 1.7352238947190146, + "learning_rate": 9.986228797882737e-06, + "loss": 0.6136, "step": 746 }, { - "epoch": 0.08, - "grad_norm": 2.269068621083456, - "learning_rate": 9.938401706283096e-06, - "loss": 0.6919, + "epoch": 0.05, + "grad_norm": 1.942070525514916, + "learning_rate": 9.986143434751593e-06, + "loss": 0.5834, "step": 747 }, { - "epoch": 0.08, - "grad_norm": 4.520110943561689, - "learning_rate": 9.93813473177655e-06, - "loss": 0.7574, + "epoch": 0.05, + "grad_norm": 1.8640144232278262, + "learning_rate": 9.986057808235457e-06, + "loss": 0.5679, "step": 748 }, { - "epoch": 0.08, - "grad_norm": 2.6705782110520553, - "learning_rate": 9.937867183571784e-06, - "loss": 0.6852, + "epoch": 0.05, + "grad_norm": 0.9611081640237724, + "learning_rate": 9.985971918338854e-06, + "loss": 0.472, "step": 749 }, { - "epoch": 0.08, - "grad_norm": 2.394466270287015, - "learning_rate": 9.93759906169988e-06, - "loss": 0.8214, + "epoch": 0.05, + "grad_norm": 2.560660193625534, + "learning_rate": 9.985885765066319e-06, + "loss": 0.5696, "step": 750 }, { - "epoch": 0.08, - "grad_norm": 2.4103257753815956, - "learning_rate": 9.937330366191994e-06, - "loss": 0.7784, + "epoch": 0.05, + "grad_norm": 2.4607444406644423, + "learning_rate": 9.985799348422403e-06, + "loss": 0.5839, "step": 751 }, { - "epoch": 0.08, - "grad_norm": 1.2399480859042666, - "learning_rate": 9.937061097079337e-06, - "loss": 0.6601, + "epoch": 0.05, + "grad_norm": 1.9314279980753932, + "learning_rate": 9.98571266841167e-06, + "loss": 0.624, "step": 752 }, { - "epoch": 0.08, - "grad_norm": 3.0481316936580254, - "learning_rate": 9.936791254393193e-06, - "loss": 0.7356, + "epoch": 0.05, + "grad_norm": 2.5415247716604976, + "learning_rate": 9.985625725038702e-06, + "loss": 0.5845, "step": 753 }, { - "epoch": 0.08, - "grad_norm": 2.437583015333378, - "learning_rate": 9.936520838164912e-06, - "loss": 0.7158, + "epoch": 0.05, + "grad_norm": 3.7644850042550524, + "learning_rate": 9.985538518308088e-06, + "loss": 0.6256, "step": 754 }, { - "epoch": 0.08, - "grad_norm": 2.707441716431831, - "learning_rate": 9.93624984842591e-06, - "loss": 0.7215, + "epoch": 0.05, + "grad_norm": 1.6918918476164666, + "learning_rate": 9.985451048224437e-06, + "loss": 0.5622, "step": 755 }, { - "epoch": 0.08, - "grad_norm": 4.532030304474135, - "learning_rate": 9.93597828520767e-06, - "loss": 0.753, + "epoch": 0.05, + "grad_norm": 2.1889029756086686, + "learning_rate": 9.98536331479237e-06, + "loss": 0.6217, "step": 756 }, { - "epoch": 0.08, - "grad_norm": 2.7461971545571786, - "learning_rate": 9.935706148541742e-06, - "loss": 0.7097, + "epoch": 0.05, + "grad_norm": 3.040684723508936, + "learning_rate": 9.985275318016517e-06, + "loss": 0.6269, "step": 757 }, { - "epoch": 0.08, - "grad_norm": 2.3627790545916216, - "learning_rate": 9.93543343845974e-06, - "loss": 0.8169, + "epoch": 0.05, + "grad_norm": 1.891838483347082, + "learning_rate": 9.98518705790153e-06, + "loss": 0.6704, "step": 758 }, { - "epoch": 0.08, - "grad_norm": 2.6693014549496814, - "learning_rate": 9.93516015499335e-06, - "loss": 0.8296, + "epoch": 0.05, + "grad_norm": 2.523225671008834, + "learning_rate": 9.985098534452074e-06, + "loss": 0.6696, "step": 759 }, { - "epoch": 0.08, - "grad_norm": 3.7105561050330222, - "learning_rate": 9.934886298174317e-06, - "loss": 0.7099, + "epoch": 0.05, + "grad_norm": 2.144537749471743, + "learning_rate": 9.985009747672818e-06, + "loss": 0.5719, "step": 760 }, { - "epoch": 0.08, - "grad_norm": 2.720683329507592, - "learning_rate": 9.93461186803446e-06, - "loss": 0.7632, + "epoch": 0.05, + "grad_norm": 1.8199455724926854, + "learning_rate": 9.984920697568457e-06, + "loss": 0.5347, "step": 761 }, { - "epoch": 0.08, - "grad_norm": 2.588291193800891, - "learning_rate": 9.934336864605663e-06, - "loss": 0.7789, + "epoch": 0.05, + "grad_norm": 1.717797303334601, + "learning_rate": 9.984831384143692e-06, + "loss": 0.5751, "step": 762 }, { - "epoch": 0.08, - "grad_norm": 2.5585071173949863, - "learning_rate": 9.934061287919869e-06, - "loss": 0.6833, + "epoch": 0.05, + "grad_norm": 1.924909989349552, + "learning_rate": 9.984741807403244e-06, + "loss": 0.5579, "step": 763 }, { - "epoch": 0.08, - "grad_norm": 2.2895773779403195, - "learning_rate": 9.9337851380091e-06, - "loss": 0.7367, + "epoch": 0.05, + "grad_norm": 1.8416242690569073, + "learning_rate": 9.984651967351842e-06, + "loss": 0.592, "step": 764 }, { - "epoch": 0.08, - "grad_norm": 3.0219975702999697, - "learning_rate": 9.933508414905434e-06, - "loss": 0.7717, + "epoch": 0.05, + "grad_norm": 1.8378304445124731, + "learning_rate": 9.984561863994234e-06, + "loss": 0.6012, "step": 765 }, { - "epoch": 0.08, - "grad_norm": 2.6270513907040094, - "learning_rate": 9.933231118641025e-06, - "loss": 0.6998, + "epoch": 0.05, + "grad_norm": 2.146671289848264, + "learning_rate": 9.984471497335178e-06, + "loss": 0.6279, "step": 766 }, { - "epoch": 0.08, - "grad_norm": 2.620673953552405, - "learning_rate": 9.932953249248082e-06, - "loss": 0.833, + "epoch": 0.05, + "grad_norm": 1.2249572230606032, + "learning_rate": 9.984380867379447e-06, + "loss": 0.4866, "step": 767 }, { - "epoch": 0.08, - "grad_norm": 2.5153078631067154, - "learning_rate": 9.93267480675889e-06, - "loss": 0.7723, + "epoch": 0.05, + "grad_norm": 1.8470411036855674, + "learning_rate": 9.984289974131829e-06, + "loss": 0.5712, "step": 768 }, { - "epoch": 0.08, - "grad_norm": 2.951354250369123, - "learning_rate": 9.9323957912058e-06, - "loss": 0.7502, + "epoch": 0.05, + "grad_norm": 0.9512314754598917, + "learning_rate": 9.984198817597126e-06, + "loss": 0.464, "step": 769 }, { - "epoch": 0.08, - "grad_norm": 1.4394149168178134, - "learning_rate": 9.932116202621224e-06, - "loss": 0.6457, + "epoch": 0.05, + "grad_norm": 6.29376042153947, + "learning_rate": 9.984107397780154e-06, + "loss": 0.6041, "step": 770 }, { - "epoch": 0.08, - "grad_norm": 2.1095938889246026, - "learning_rate": 9.931836041037644e-06, - "loss": 0.7061, + "epoch": 0.05, + "grad_norm": 1.9560030189945388, + "learning_rate": 9.984015714685739e-06, + "loss": 0.5777, "step": 771 }, { - "epoch": 0.08, - "grad_norm": 2.6819242824234957, - "learning_rate": 9.931555306487612e-06, - "loss": 0.7691, + "epoch": 0.05, + "grad_norm": 0.9920985477251776, + "learning_rate": 9.983923768318727e-06, + "loss": 0.4802, "step": 772 }, { - "epoch": 0.08, - "grad_norm": 2.2043376051832424, - "learning_rate": 9.931273999003738e-06, - "loss": 0.6946, + "epoch": 0.05, + "grad_norm": 1.9873330441301158, + "learning_rate": 9.983831558683971e-06, + "loss": 0.647, "step": 773 }, { - "epoch": 0.08, - "grad_norm": 2.6902924884956425, - "learning_rate": 9.930992118618706e-06, - "loss": 0.6695, + "epoch": 0.05, + "grad_norm": 1.7994735729841338, + "learning_rate": 9.983739085786347e-06, + "loss": 0.5465, "step": 774 }, { - "epoch": 0.08, - "grad_norm": 2.8989924651843326, - "learning_rate": 9.930709665365264e-06, - "loss": 0.7694, + "epoch": 0.05, + "grad_norm": 2.1591862062181253, + "learning_rate": 9.983646349630738e-06, + "loss": 0.6523, "step": 775 }, { - "epoch": 0.08, - "grad_norm": 2.224303330241575, - "learning_rate": 9.930426639276225e-06, - "loss": 0.7487, + "epoch": 0.06, + "grad_norm": 0.842681928469058, + "learning_rate": 9.983553350222042e-06, + "loss": 0.4655, "step": 776 }, { - "epoch": 0.08, - "grad_norm": 2.6983836303838826, - "learning_rate": 9.930143040384472e-06, - "loss": 0.7523, + "epoch": 0.06, + "grad_norm": 0.93618269982594, + "learning_rate": 9.983460087565169e-06, + "loss": 0.4398, "step": 777 }, { - "epoch": 0.08, - "grad_norm": 2.3780230591194065, - "learning_rate": 9.929858868722954e-06, - "loss": 0.7692, + "epoch": 0.06, + "grad_norm": 1.732337076785266, + "learning_rate": 9.983366561665049e-06, + "loss": 0.5877, "step": 778 }, { - "epoch": 0.08, - "grad_norm": 2.4634887523343942, - "learning_rate": 9.929574124324682e-06, - "loss": 0.8036, + "epoch": 0.06, + "grad_norm": 1.9580441152640422, + "learning_rate": 9.983272772526621e-06, + "loss": 0.5928, "step": 779 }, { - "epoch": 0.08, - "grad_norm": 2.2694290012543052, - "learning_rate": 9.929288807222738e-06, - "loss": 0.7424, + "epoch": 0.06, + "grad_norm": 3.2504826064327057, + "learning_rate": 9.98317872015484e-06, + "loss": 0.6113, "step": 780 }, { - "epoch": 0.08, - "grad_norm": 2.6592362586407394, - "learning_rate": 9.92900291745027e-06, - "loss": 0.6375, + "epoch": 0.06, + "grad_norm": 1.947286097097674, + "learning_rate": 9.983084404554674e-06, + "loss": 0.6242, "step": 781 }, { - "epoch": 0.08, - "grad_norm": 3.495897539805403, - "learning_rate": 9.92871645504049e-06, - "loss": 0.7854, + "epoch": 0.06, + "grad_norm": 2.225099355263732, + "learning_rate": 9.982989825731106e-06, + "loss": 0.6366, "step": 782 }, { - "epoch": 0.08, - "grad_norm": 2.0961963685516727, - "learning_rate": 9.928429420026682e-06, - "loss": 0.7634, + "epoch": 0.06, + "grad_norm": 2.0629680222217424, + "learning_rate": 9.982894983689129e-06, + "loss": 0.6226, "step": 783 }, { - "epoch": 0.08, - "grad_norm": 2.8509364984526253, - "learning_rate": 9.92814181244219e-06, - "loss": 0.6458, + "epoch": 0.06, + "grad_norm": 1.7648291403420113, + "learning_rate": 9.982799878433754e-06, + "loss": 0.536, "step": 784 }, { - "epoch": 0.08, - "grad_norm": 2.8241576523218956, - "learning_rate": 9.927853632320427e-06, - "loss": 0.7804, + "epoch": 0.06, + "grad_norm": 1.725067037334444, + "learning_rate": 9.982704509970007e-06, + "loss": 0.5591, "step": 785 }, { - "epoch": 0.08, - "grad_norm": 3.0001225664364055, - "learning_rate": 9.927564879694874e-06, - "loss": 0.7229, + "epoch": 0.06, + "grad_norm": 2.9583644776020774, + "learning_rate": 9.982608878302924e-06, + "loss": 0.6604, "step": 786 }, { - "epoch": 0.08, - "grad_norm": 3.4475528816764944, - "learning_rate": 9.927275554599078e-06, - "loss": 0.8362, + "epoch": 0.06, + "grad_norm": 1.8682453386288844, + "learning_rate": 9.982512983437555e-06, + "loss": 0.5625, "step": 787 }, { - "epoch": 0.08, - "grad_norm": 2.2308232356508526, - "learning_rate": 9.926985657066653e-06, - "loss": 0.7114, + "epoch": 0.06, + "grad_norm": 3.162372561126446, + "learning_rate": 9.982416825378968e-06, + "loss": 0.5895, "step": 788 }, { - "epoch": 0.08, - "grad_norm": 2.341682432680137, - "learning_rate": 9.926695187131275e-06, - "loss": 0.7783, + "epoch": 0.06, + "grad_norm": 1.1101950242741496, + "learning_rate": 9.982320404132242e-06, + "loss": 0.4596, "step": 789 }, { - "epoch": 0.08, - "grad_norm": 2.6052199283711874, - "learning_rate": 9.92640414482669e-06, - "loss": 0.8333, + "epoch": 0.06, + "grad_norm": 2.4882629568452423, + "learning_rate": 9.982223719702469e-06, + "loss": 0.5899, "step": 790 }, { - "epoch": 0.08, - "grad_norm": 2.8370875665068858, - "learning_rate": 9.926112530186715e-06, - "loss": 0.7107, + "epoch": 0.06, + "grad_norm": 3.6303379454036557, + "learning_rate": 9.982126772094758e-06, + "loss": 0.594, "step": 791 }, { - "epoch": 0.08, - "grad_norm": 2.5275739897207865, - "learning_rate": 9.925820343245225e-06, - "loss": 0.7965, + "epoch": 0.06, + "grad_norm": 3.672779172491735, + "learning_rate": 9.982029561314227e-06, + "loss": 0.5792, "step": 792 }, { - "epoch": 0.08, - "grad_norm": 2.241333301690011, - "learning_rate": 9.925527584036167e-06, - "loss": 0.8081, + "epoch": 0.06, + "grad_norm": 1.9294736206741199, + "learning_rate": 9.981932087366017e-06, + "loss": 0.593, "step": 793 }, { - "epoch": 0.08, - "grad_norm": 2.1980959634447164, - "learning_rate": 9.925234252593554e-06, - "loss": 0.7231, + "epoch": 0.06, + "grad_norm": 0.9767696875647004, + "learning_rate": 9.98183435025527e-06, + "loss": 0.4484, "step": 794 }, { - "epoch": 0.08, - "grad_norm": 2.5031300722496965, - "learning_rate": 9.92494034895146e-06, - "loss": 0.6994, + "epoch": 0.06, + "grad_norm": 1.7538415628753308, + "learning_rate": 9.981736349987153e-06, + "loss": 0.5662, "step": 795 }, { - "epoch": 0.08, - "grad_norm": 2.1430334991183604, - "learning_rate": 9.924645873144035e-06, - "loss": 0.6908, + "epoch": 0.06, + "grad_norm": 1.687466774691323, + "learning_rate": 9.98163808656684e-06, + "loss": 0.6094, "step": 796 }, { - "epoch": 0.08, - "grad_norm": 2.350620970434163, - "learning_rate": 9.924350825205487e-06, - "loss": 0.8007, + "epoch": 0.06, + "grad_norm": 2.0650637404171346, + "learning_rate": 9.981539559999525e-06, + "loss": 0.5608, "step": 797 }, { - "epoch": 0.08, - "grad_norm": 2.579066826686899, - "learning_rate": 9.924055205170095e-06, - "loss": 0.8572, + "epoch": 0.06, + "grad_norm": 2.187483416040044, + "learning_rate": 9.981440770290412e-06, + "loss": 0.6347, "step": 798 }, { - "epoch": 0.08, - "grad_norm": 2.9358520387744473, - "learning_rate": 9.923759013072205e-06, - "loss": 0.7289, + "epoch": 0.06, + "grad_norm": 2.353329825927032, + "learning_rate": 9.981341717444715e-06, + "loss": 0.6329, "step": 799 }, { - "epoch": 0.08, - "grad_norm": 1.4595413230957774, - "learning_rate": 9.923462248946224e-06, - "loss": 0.6404, + "epoch": 0.06, + "grad_norm": 2.272046913463745, + "learning_rate": 9.98124240146767e-06, + "loss": 0.6088, "step": 800 }, { - "epoch": 0.08, - "grad_norm": 2.25406590521839, - "learning_rate": 9.923164912826631e-06, - "loss": 0.7915, + "epoch": 0.06, + "grad_norm": 2.0699615660476876, + "learning_rate": 9.981142822364524e-06, + "loss": 0.6733, "step": 801 }, { - "epoch": 0.08, - "grad_norm": 2.5320545260007727, - "learning_rate": 9.922867004747971e-06, - "loss": 0.7355, + "epoch": 0.06, + "grad_norm": 1.8126499471437967, + "learning_rate": 9.981042980140536e-06, + "loss": 0.5618, "step": 802 }, { - "epoch": 0.08, - "grad_norm": 1.0795033707901633, - "learning_rate": 9.922568524744854e-06, - "loss": 0.6239, + "epoch": 0.06, + "grad_norm": 1.7641807829159009, + "learning_rate": 9.980942874800979e-06, + "loss": 0.6154, "step": 803 }, { - "epoch": 0.08, - "grad_norm": 5.1305263819984, - "learning_rate": 9.922269472851953e-06, - "loss": 0.7992, + "epoch": 0.06, + "grad_norm": 2.160356581384707, + "learning_rate": 9.98084250635114e-06, + "loss": 0.6402, "step": 804 }, { - "epoch": 0.08, - "grad_norm": 2.4229395578571924, - "learning_rate": 9.921969849104015e-06, - "loss": 0.686, + "epoch": 0.06, + "grad_norm": 1.8193463519286226, + "learning_rate": 9.980741874796324e-06, + "loss": 0.5563, "step": 805 }, { - "epoch": 0.08, - "grad_norm": 2.925961640710328, - "learning_rate": 9.921669653535848e-06, - "loss": 0.7174, + "epoch": 0.06, + "grad_norm": 0.9709661037978785, + "learning_rate": 9.980640980141844e-06, + "loss": 0.4513, "step": 806 }, { - "epoch": 0.08, - "grad_norm": 2.77414781814395, - "learning_rate": 9.921368886182328e-06, - "loss": 0.7249, + "epoch": 0.06, + "grad_norm": 1.7775913205613263, + "learning_rate": 9.980539822393032e-06, + "loss": 0.5215, "step": 807 }, { - "epoch": 0.09, - "grad_norm": 2.732166312681211, - "learning_rate": 9.921067547078396e-06, - "loss": 0.7682, + "epoch": 0.06, + "grad_norm": 0.8514190717792197, + "learning_rate": 9.980438401555231e-06, + "loss": 0.482, "step": 808 }, { - "epoch": 0.09, - "grad_norm": 2.6581746298192357, - "learning_rate": 9.920765636259062e-06, - "loss": 0.7427, + "epoch": 0.06, + "grad_norm": 1.9522669354872004, + "learning_rate": 9.980336717633796e-06, + "loss": 0.6854, "step": 809 }, { - "epoch": 0.09, - "grad_norm": 3.420618373750652, - "learning_rate": 9.9204631537594e-06, - "loss": 0.8256, + "epoch": 0.06, + "grad_norm": 2.2071671007448836, + "learning_rate": 9.9802347706341e-06, + "loss": 0.6123, "step": 810 }, { - "epoch": 0.09, - "grad_norm": 4.452433300784335, - "learning_rate": 9.920160099614553e-06, - "loss": 0.7742, + "epoch": 0.06, + "grad_norm": 1.8343281883272198, + "learning_rate": 9.980132560561528e-06, + "loss": 0.7338, "step": 811 }, { - "epoch": 0.09, - "grad_norm": 2.347854862834246, - "learning_rate": 9.91985647385973e-06, - "loss": 0.6981, + "epoch": 0.06, + "grad_norm": 1.9079408401876257, + "learning_rate": 9.980030087421479e-06, + "loss": 0.5817, "step": 812 }, { - "epoch": 0.09, - "grad_norm": 2.335973187712152, - "learning_rate": 9.919552276530202e-06, - "loss": 0.6822, + "epoch": 0.06, + "grad_norm": 1.6192142427880043, + "learning_rate": 9.979927351219367e-06, + "loss": 0.5169, "step": 813 }, { - "epoch": 0.09, - "grad_norm": 3.0899513351336645, - "learning_rate": 9.919247507661313e-06, - "loss": 0.6554, + "epoch": 0.06, + "grad_norm": 2.5112111558540344, + "learning_rate": 9.979824351960617e-06, + "loss": 0.6184, "step": 814 }, { - "epoch": 0.09, - "grad_norm": 2.4579977944848657, - "learning_rate": 9.918942167288467e-06, - "loss": 0.7347, + "epoch": 0.06, + "grad_norm": 2.362258064545396, + "learning_rate": 9.979721089650671e-06, + "loss": 0.6368, "step": 815 }, { - "epoch": 0.09, - "grad_norm": 2.2321378141123316, - "learning_rate": 9.918636255447141e-06, - "loss": 0.7083, + "epoch": 0.06, + "grad_norm": 1.9615972940113384, + "learning_rate": 9.979617564294986e-06, + "loss": 0.6112, "step": 816 }, { - "epoch": 0.09, - "grad_norm": 2.337678644880633, - "learning_rate": 9.918329772172872e-06, - "loss": 0.7353, + "epoch": 0.06, + "grad_norm": 1.7435099176695448, + "learning_rate": 9.979513775899024e-06, + "loss": 0.5284, "step": 817 }, { - "epoch": 0.09, - "grad_norm": 2.484233611654691, - "learning_rate": 9.918022717501268e-06, - "loss": 0.7626, + "epoch": 0.06, + "grad_norm": 1.8021295795921246, + "learning_rate": 9.979409724468274e-06, + "loss": 0.5221, "step": 818 }, { - "epoch": 0.09, - "grad_norm": 2.6233513206248698, - "learning_rate": 9.917715091467999e-06, - "loss": 0.7422, + "epoch": 0.06, + "grad_norm": 1.6482342150238698, + "learning_rate": 9.97930541000823e-06, + "loss": 0.564, "step": 819 }, { - "epoch": 0.09, - "grad_norm": 2.2460279378058137, - "learning_rate": 9.91740689410881e-06, - "loss": 0.8034, + "epoch": 0.06, + "grad_norm": 2.451519029084754, + "learning_rate": 9.9792008325244e-06, + "loss": 0.6594, "step": 820 }, { - "epoch": 0.09, - "grad_norm": 3.014666827028448, - "learning_rate": 9.917098125459501e-06, - "loss": 0.7201, + "epoch": 0.06, + "grad_norm": 2.1937804630356323, + "learning_rate": 9.979095992022311e-06, + "loss": 0.589, "step": 821 }, { - "epoch": 0.09, - "grad_norm": 2.8852937872527904, - "learning_rate": 9.916788785555945e-06, - "loss": 0.702, + "epoch": 0.06, + "grad_norm": 1.790033522649253, + "learning_rate": 9.978990888507502e-06, + "loss": 0.6604, "step": 822 }, { - "epoch": 0.09, - "grad_norm": 2.468109581522842, - "learning_rate": 9.91647887443408e-06, - "loss": 0.7292, + "epoch": 0.06, + "grad_norm": 1.9220007754551103, + "learning_rate": 9.978885521985522e-06, + "loss": 0.616, "step": 823 }, { - "epoch": 0.09, - "grad_norm": 2.0137798678101095, - "learning_rate": 9.916168392129914e-06, - "loss": 0.7081, + "epoch": 0.06, + "grad_norm": 2.171765350694403, + "learning_rate": 9.978779892461936e-06, + "loss": 0.5481, "step": 824 }, { - "epoch": 0.09, - "grad_norm": 2.1356848172886997, - "learning_rate": 9.915857338679515e-06, - "loss": 0.7477, + "epoch": 0.06, + "grad_norm": 1.9508105940602367, + "learning_rate": 9.978673999942328e-06, + "loss": 0.6372, "step": 825 }, { - "epoch": 0.09, - "grad_norm": 2.146941252431847, - "learning_rate": 9.91554571411902e-06, - "loss": 0.7231, + "epoch": 0.06, + "grad_norm": 1.8989133394027413, + "learning_rate": 9.97856784443229e-06, + "loss": 0.6587, "step": 826 }, { - "epoch": 0.09, - "grad_norm": 1.967160075670586, - "learning_rate": 9.915233518484633e-06, - "loss": 0.7984, + "epoch": 0.06, + "grad_norm": 2.4652631765140485, + "learning_rate": 9.978461425937427e-06, + "loss": 0.6225, "step": 827 }, { - "epoch": 0.09, - "grad_norm": 2.0751156185412385, - "learning_rate": 9.914920751812626e-06, - "loss": 0.8102, + "epoch": 0.06, + "grad_norm": 1.8014241908131035, + "learning_rate": 9.978354744463361e-06, + "loss": 0.6246, "step": 828 }, { - "epoch": 0.09, - "grad_norm": 2.3472721595318586, - "learning_rate": 9.914607414139332e-06, - "loss": 0.7313, + "epoch": 0.06, + "grad_norm": 1.7241092365717274, + "learning_rate": 9.97824780001573e-06, + "loss": 0.5554, "step": 829 }, { - "epoch": 0.09, - "grad_norm": 2.118986957021893, - "learning_rate": 9.914293505501155e-06, - "loss": 0.8668, + "epoch": 0.06, + "grad_norm": 1.818362165932305, + "learning_rate": 9.978140592600182e-06, + "loss": 0.6395, "step": 830 }, { - "epoch": 0.09, - "grad_norm": 2.394587519870367, - "learning_rate": 9.913979025934566e-06, - "loss": 0.7498, + "epoch": 0.06, + "grad_norm": 2.278539916827366, + "learning_rate": 9.978033122222379e-06, + "loss": 0.5357, "step": 831 }, { - "epoch": 0.09, - "grad_norm": 3.0206794969562405, - "learning_rate": 9.913663975476099e-06, - "loss": 0.7633, + "epoch": 0.06, + "grad_norm": 2.0268939508869095, + "learning_rate": 9.977925388888e-06, + "loss": 0.6488, "step": 832 }, { - "epoch": 0.09, - "grad_norm": 2.096598722449407, - "learning_rate": 9.913348354162353e-06, - "loss": 0.7422, + "epoch": 0.06, + "grad_norm": 2.6293847185169166, + "learning_rate": 9.977817392602733e-06, + "loss": 0.7021, "step": 833 }, { - "epoch": 0.09, - "grad_norm": 2.4996308686533895, - "learning_rate": 9.913032162029999e-06, - "loss": 0.7345, + "epoch": 0.06, + "grad_norm": 2.0052066649871674, + "learning_rate": 9.977709133372284e-06, + "loss": 0.6196, "step": 834 }, { - "epoch": 0.09, - "grad_norm": 2.692833005695984, - "learning_rate": 9.91271539911577e-06, - "loss": 0.783, + "epoch": 0.06, + "grad_norm": 1.1784984272427252, + "learning_rate": 9.977600611202372e-06, + "loss": 0.4805, "step": 835 }, { - "epoch": 0.09, - "grad_norm": 3.1228836691300037, - "learning_rate": 9.91239806545647e-06, - "loss": 0.7425, + "epoch": 0.06, + "grad_norm": 1.84166732277992, + "learning_rate": 9.97749182609873e-06, + "loss": 0.5855, "step": 836 }, { - "epoch": 0.09, - "grad_norm": 2.3986505144559427, - "learning_rate": 9.91208016108896e-06, - "loss": 0.7267, + "epoch": 0.06, + "grad_norm": 1.9709655655215204, + "learning_rate": 9.977382778067104e-06, + "loss": 0.6126, "step": 837 }, { - "epoch": 0.09, - "grad_norm": 2.7101167395338273, - "learning_rate": 9.911761686050177e-06, - "loss": 0.7581, + "epoch": 0.06, + "grad_norm": 1.8229458479445437, + "learning_rate": 9.977273467113254e-06, + "loss": 0.6582, "step": 838 }, { - "epoch": 0.09, - "grad_norm": 2.5806365307450667, - "learning_rate": 9.91144264037712e-06, - "loss": 0.7847, + "epoch": 0.06, + "grad_norm": 1.166124119682141, + "learning_rate": 9.977163893242952e-06, + "loss": 0.4634, "step": 839 }, { - "epoch": 0.09, - "grad_norm": 1.5964088279350312, - "learning_rate": 9.911123024106854e-06, - "loss": 0.6742, + "epoch": 0.06, + "grad_norm": 1.9683453676183154, + "learning_rate": 9.97705405646199e-06, + "loss": 0.6159, "step": 840 }, { - "epoch": 0.09, - "grad_norm": 2.0458536491050143, - "learning_rate": 9.910802837276514e-06, - "loss": 0.6898, + "epoch": 0.06, + "grad_norm": 1.8277088811646793, + "learning_rate": 9.976943956776168e-06, + "loss": 0.566, "step": 841 }, { - "epoch": 0.09, - "grad_norm": 1.9738131081216486, - "learning_rate": 9.910482079923293e-06, - "loss": 0.7097, + "epoch": 0.06, + "grad_norm": 1.7589739881682507, + "learning_rate": 9.976833594191301e-06, + "loss": 0.6541, "step": 842 }, { - "epoch": 0.09, - "grad_norm": 4.033940638900864, - "learning_rate": 9.910160752084461e-06, - "loss": 0.6891, + "epoch": 0.06, + "grad_norm": 2.465341610938836, + "learning_rate": 9.976722968713222e-06, + "loss": 0.6112, "step": 843 }, { - "epoch": 0.09, - "grad_norm": 2.493418453706031, - "learning_rate": 9.909838853797347e-06, - "loss": 0.8386, + "epoch": 0.06, + "grad_norm": 2.2799024015877056, + "learning_rate": 9.97661208034777e-06, + "loss": 0.5758, "step": 844 }, { - "epoch": 0.09, - "grad_norm": 2.3609786228834397, - "learning_rate": 9.909516385099346e-06, - "loss": 0.6447, + "epoch": 0.06, + "grad_norm": 1.692080697305744, + "learning_rate": 9.976500929100806e-06, + "loss": 0.625, "step": 845 }, { - "epoch": 0.09, - "grad_norm": 1.8826827313822074, - "learning_rate": 9.909193346027923e-06, - "loss": 0.7634, + "epoch": 0.06, + "grad_norm": 1.6112302465168034, + "learning_rate": 9.9763895149782e-06, + "loss": 0.5957, "step": 846 }, { - "epoch": 0.09, - "grad_norm": 2.192146232328817, - "learning_rate": 9.90886973662061e-06, - "loss": 0.6501, + "epoch": 0.06, + "grad_norm": 1.0915184156344058, + "learning_rate": 9.976277837985838e-06, + "loss": 0.4287, "step": 847 }, { - "epoch": 0.09, - "grad_norm": 4.924566828839797, - "learning_rate": 9.908545556915e-06, - "loss": 0.7382, + "epoch": 0.06, + "grad_norm": 1.6960835734776885, + "learning_rate": 9.976165898129617e-06, + "loss": 0.5701, "step": 848 }, { - "epoch": 0.09, - "grad_norm": 2.124722725213922, - "learning_rate": 9.908220806948755e-06, - "loss": 0.7342, + "epoch": 0.06, + "grad_norm": 1.8209266226111924, + "learning_rate": 9.976053695415453e-06, + "loss": 0.5464, "step": 849 }, { - "epoch": 0.09, - "grad_norm": 3.149040160809967, - "learning_rate": 9.90789548675961e-06, - "loss": 0.769, + "epoch": 0.06, + "grad_norm": 1.8962231714399242, + "learning_rate": 9.975941229849273e-06, + "loss": 0.6345, "step": 850 }, { - "epoch": 0.09, - "grad_norm": 2.016612347468723, - "learning_rate": 9.90756959638535e-06, - "loss": 0.7533, + "epoch": 0.06, + "grad_norm": 2.4347057278624904, + "learning_rate": 9.975828501437015e-06, + "loss": 0.6128, "step": 851 }, { - "epoch": 0.09, - "grad_norm": 2.21890626690869, - "learning_rate": 9.90724313586384e-06, - "loss": 0.7183, + "epoch": 0.06, + "grad_norm": 0.9701608457378855, + "learning_rate": 9.975715510184633e-06, + "loss": 0.4677, "step": 852 }, { - "epoch": 0.09, - "grad_norm": 2.1739377588424182, - "learning_rate": 9.90691610523301e-06, - "loss": 0.725, + "epoch": 0.06, + "grad_norm": 1.826048645610474, + "learning_rate": 9.975602256098101e-06, + "loss": 0.601, "step": 853 }, { - "epoch": 0.09, - "grad_norm": 2.4308506352472565, - "learning_rate": 9.906588504530852e-06, - "loss": 0.7721, + "epoch": 0.06, + "grad_norm": 1.7635323265430154, + "learning_rate": 9.975488739183397e-06, + "loss": 0.5631, "step": 854 }, { - "epoch": 0.09, - "grad_norm": 2.039428986286277, - "learning_rate": 9.906260333795423e-06, - "loss": 0.7862, + "epoch": 0.06, + "grad_norm": 1.7170652814724954, + "learning_rate": 9.975374959446518e-06, + "loss": 0.5358, "step": 855 }, { - "epoch": 0.09, - "grad_norm": 2.072889277401122, - "learning_rate": 9.905931593064852e-06, - "loss": 0.7057, + "epoch": 0.06, + "grad_norm": 1.82466761165953, + "learning_rate": 9.975260916893477e-06, + "loss": 0.5581, "step": 856 }, { - "epoch": 0.09, - "grad_norm": 2.5603746952912583, - "learning_rate": 9.905602282377331e-06, - "loss": 0.7845, + "epoch": 0.06, + "grad_norm": 1.6088892246849174, + "learning_rate": 9.975146611530293e-06, + "loss": 0.5251, "step": 857 }, { - "epoch": 0.09, - "grad_norm": 2.2094754050609255, - "learning_rate": 9.905272401771115e-06, - "loss": 0.6726, + "epoch": 0.06, + "grad_norm": 4.317816069250075, + "learning_rate": 9.97503204336301e-06, + "loss": 0.5963, "step": 858 }, { - "epoch": 0.09, - "grad_norm": 2.010211257078767, - "learning_rate": 9.904941951284535e-06, - "loss": 0.82, + "epoch": 0.06, + "grad_norm": 1.6900684818219267, + "learning_rate": 9.974917212397674e-06, + "loss": 0.6305, "step": 859 }, { - "epoch": 0.09, - "grad_norm": 2.4294590301163907, - "learning_rate": 9.904610930955975e-06, - "loss": 0.7225, + "epoch": 0.06, + "grad_norm": 2.025470447529022, + "learning_rate": 9.974802118640354e-06, + "loss": 0.5842, "step": 860 }, { - "epoch": 0.09, - "grad_norm": 2.4545417076156157, - "learning_rate": 9.904279340823895e-06, - "loss": 0.7379, + "epoch": 0.06, + "grad_norm": 2.021441255022368, + "learning_rate": 9.97468676209713e-06, + "loss": 0.5827, "step": 861 }, { - "epoch": 0.09, - "grad_norm": 2.0489546331766704, - "learning_rate": 9.903947180926819e-06, - "loss": 0.7939, + "epoch": 0.06, + "grad_norm": 1.8610307300944973, + "learning_rate": 9.974571142774095e-06, + "loss": 0.5934, "step": 862 }, { - "epoch": 0.09, - "grad_norm": 4.328322416011042, - "learning_rate": 9.903614451303335e-06, - "loss": 0.7423, + "epoch": 0.06, + "grad_norm": 1.0146787174815457, + "learning_rate": 9.974455260677353e-06, + "loss": 0.4821, "step": 863 }, { - "epoch": 0.09, - "grad_norm": 2.673446796533317, - "learning_rate": 9.903281151992097e-06, - "loss": 0.684, + "epoch": 0.06, + "grad_norm": 3.019263994741795, + "learning_rate": 9.974339115813031e-06, + "loss": 0.5763, "step": 864 }, { - "epoch": 0.09, - "grad_norm": 2.6240021544906904, - "learning_rate": 9.902947283031833e-06, - "loss": 0.6573, + "epoch": 0.06, + "grad_norm": 2.172402342565474, + "learning_rate": 9.974222708187263e-06, + "loss": 0.608, "step": 865 }, { - "epoch": 0.09, - "grad_norm": 2.4194535688154097, - "learning_rate": 9.902612844461322e-06, - "loss": 0.7671, + "epoch": 0.06, + "grad_norm": 1.8419787566644, + "learning_rate": 9.974106037806195e-06, + "loss": 0.5085, "step": 866 }, { - "epoch": 0.09, - "grad_norm": 2.600458794788045, - "learning_rate": 9.902277836319424e-06, - "loss": 0.686, + "epoch": 0.06, + "grad_norm": 1.9829903508564541, + "learning_rate": 9.97398910467599e-06, + "loss": 0.5915, "step": 867 }, { - "epoch": 0.09, - "grad_norm": 2.7708332752714226, - "learning_rate": 9.90194225864506e-06, - "loss": 0.7713, + "epoch": 0.06, + "grad_norm": 2.083601631108778, + "learning_rate": 9.973871908802828e-06, + "loss": 0.6569, "step": 868 }, { - "epoch": 0.09, - "grad_norm": 2.345954440796902, - "learning_rate": 9.901606111477213e-06, - "loss": 0.7127, + "epoch": 0.06, + "grad_norm": 1.8609155361391914, + "learning_rate": 9.973754450192899e-06, + "loss": 0.5951, "step": 869 }, { - "epoch": 0.09, - "grad_norm": 2.4380277637214958, - "learning_rate": 9.901269394854938e-06, - "loss": 0.7432, + "epoch": 0.06, + "grad_norm": 1.922397781312075, + "learning_rate": 9.973636728852406e-06, + "loss": 0.6317, "step": 870 }, { - "epoch": 0.09, - "grad_norm": 2.535510929005561, - "learning_rate": 9.900932108817352e-06, - "loss": 0.735, + "epoch": 0.06, + "grad_norm": 2.0937765887126445, + "learning_rate": 9.973518744787564e-06, + "loss": 0.5528, "step": 871 }, { - "epoch": 0.09, - "grad_norm": 3.0039289042478337, - "learning_rate": 9.900594253403642e-06, - "loss": 0.76, + "epoch": 0.06, + "grad_norm": 0.9307070356280323, + "learning_rate": 9.973400498004614e-06, + "loss": 0.4379, "step": 872 }, { - "epoch": 0.09, - "grad_norm": 2.350300158880335, - "learning_rate": 9.900255828653057e-06, - "loss": 0.6084, + "epoch": 0.06, + "grad_norm": 1.9708208473918585, + "learning_rate": 9.973281988509797e-06, + "loss": 0.6242, "step": 873 }, { - "epoch": 0.09, - "grad_norm": 2.4958117500634938, - "learning_rate": 9.899916834604914e-06, - "loss": 0.7951, + "epoch": 0.06, + "grad_norm": 2.1716549774775755, + "learning_rate": 9.973163216309372e-06, + "loss": 0.575, "step": 874 }, { - "epoch": 0.09, - "grad_norm": 2.4483497475118843, - "learning_rate": 9.899577271298596e-06, - "loss": 0.7217, + "epoch": 0.06, + "grad_norm": 1.9834120094353342, + "learning_rate": 9.973044181409615e-06, + "loss": 0.6476, "step": 875 }, { - "epoch": 0.09, - "grad_norm": 1.9481072329531595, - "learning_rate": 9.89923713877356e-06, - "loss": 0.6245, + "epoch": 0.06, + "grad_norm": 1.881757462925116, + "learning_rate": 9.972924883816813e-06, + "loss": 0.6009, "step": 876 }, { - "epoch": 0.09, - "grad_norm": 2.765051932549656, - "learning_rate": 9.89889643706931e-06, - "loss": 0.6723, + "epoch": 0.06, + "grad_norm": 2.147014728035417, + "learning_rate": 9.97280532353727e-06, + "loss": 0.5882, "step": 877 }, { - "epoch": 0.09, - "grad_norm": 2.739221117936455, - "learning_rate": 9.898555166225434e-06, - "loss": 0.7703, + "epoch": 0.06, + "grad_norm": 2.1724960317691444, + "learning_rate": 9.972685500577298e-06, + "loss": 0.5682, "step": 878 }, { - "epoch": 0.09, - "grad_norm": 2.1418498868809768, - "learning_rate": 9.89821332628158e-06, - "loss": 0.7298, + "epoch": 0.06, + "grad_norm": 4.282556217063045, + "learning_rate": 9.972565414943227e-06, + "loss": 0.5298, "step": 879 }, { - "epoch": 0.09, - "grad_norm": 2.2036351653598367, - "learning_rate": 9.897870917277461e-06, - "loss": 0.8093, + "epoch": 0.06, + "grad_norm": 2.105894236389261, + "learning_rate": 9.972445066641402e-06, + "loss": 0.585, "step": 880 }, { - "epoch": 0.09, - "grad_norm": 2.4323072705596505, - "learning_rate": 9.897527939252858e-06, - "loss": 0.7399, + "epoch": 0.06, + "grad_norm": 1.6996022889408693, + "learning_rate": 9.972324455678183e-06, + "loss": 0.602, "step": 881 }, { - "epoch": 0.09, - "grad_norm": 2.8657728288025477, - "learning_rate": 9.897184392247614e-06, - "loss": 0.7356, + "epoch": 0.06, + "grad_norm": 1.814945086356654, + "learning_rate": 9.972203582059934e-06, + "loss": 0.6793, "step": 882 }, { - "epoch": 0.09, - "grad_norm": 2.5414139234529642, - "learning_rate": 9.896840276301645e-06, - "loss": 0.6747, + "epoch": 0.06, + "grad_norm": 1.905743097274066, + "learning_rate": 9.972082445793045e-06, + "loss": 0.5951, "step": 883 }, { - "epoch": 0.09, - "grad_norm": 2.2994147096204327, - "learning_rate": 9.896495591454929e-06, - "loss": 0.7335, + "epoch": 0.06, + "grad_norm": 2.984656564542831, + "learning_rate": 9.971961046883913e-06, + "loss": 0.5825, "step": 884 }, { - "epoch": 0.09, - "grad_norm": 1.9851136351096752, - "learning_rate": 9.896150337747508e-06, - "loss": 0.7839, + "epoch": 0.06, + "grad_norm": 0.9527634299624462, + "learning_rate": 9.971839385338952e-06, + "loss": 0.4644, "step": 885 }, { - "epoch": 0.09, - "grad_norm": 2.475304204640165, - "learning_rate": 9.895804515219495e-06, - "loss": 0.7442, + "epoch": 0.06, + "grad_norm": 1.710910228263264, + "learning_rate": 9.971717461164587e-06, + "loss": 0.6145, "step": 886 }, { - "epoch": 0.09, - "grad_norm": 3.0051133893066178, - "learning_rate": 9.895458123911066e-06, - "loss": 0.819, + "epoch": 0.06, + "grad_norm": 1.9693978242875008, + "learning_rate": 9.97159527436726e-06, + "loss": 0.5932, "step": 887 }, { - "epoch": 0.09, - "grad_norm": 3.01882317139601, - "learning_rate": 9.895111163862464e-06, - "loss": 0.743, + "epoch": 0.06, + "grad_norm": 1.5650670846721049, + "learning_rate": 9.971472824953424e-06, + "loss": 0.5868, "step": 888 }, { - "epoch": 0.09, - "grad_norm": 1.4869064278045154, - "learning_rate": 9.894763635113995e-06, - "loss": 0.673, + "epoch": 0.06, + "grad_norm": 5.887828219587848, + "learning_rate": 9.971350112929547e-06, + "loss": 0.593, "step": 889 }, { - "epoch": 0.09, - "grad_norm": 2.0423193152498316, - "learning_rate": 9.894415537706036e-06, - "loss": 0.7938, + "epoch": 0.06, + "grad_norm": 2.4023258415443984, + "learning_rate": 9.971227138302112e-06, + "loss": 0.589, "step": 890 }, { - "epoch": 0.09, - "grad_norm": 2.3877929769561415, - "learning_rate": 9.89406687167903e-06, - "loss": 0.8399, + "epoch": 0.06, + "grad_norm": 5.939024582463684, + "learning_rate": 9.971103901077617e-06, + "loss": 0.5792, "step": 891 }, { - "epoch": 0.09, - "grad_norm": 2.3118433378041283, - "learning_rate": 9.893717637073483e-06, - "loss": 0.7365, + "epoch": 0.06, + "grad_norm": 2.0785454618638526, + "learning_rate": 9.970980401262567e-06, + "loss": 0.5687, "step": 892 }, { - "epoch": 0.09, - "grad_norm": 3.1231159883183817, - "learning_rate": 9.893367833929965e-06, - "loss": 0.6975, + "epoch": 0.06, + "grad_norm": 2.0946012688564672, + "learning_rate": 9.97085663886349e-06, + "loss": 0.6617, "step": 893 }, { - "epoch": 0.09, - "grad_norm": 2.946732175793273, - "learning_rate": 9.893017462289119e-06, - "loss": 0.7355, + "epoch": 0.06, + "grad_norm": 1.752209723044127, + "learning_rate": 9.97073261388692e-06, + "loss": 0.6015, "step": 894 }, { - "epoch": 0.09, - "grad_norm": 4.3717939300188275, - "learning_rate": 9.892666522191648e-06, - "loss": 0.6766, + "epoch": 0.06, + "grad_norm": 1.9766817219219843, + "learning_rate": 9.970608326339412e-06, + "loss": 0.6951, "step": 895 }, { - "epoch": 0.09, - "grad_norm": 3.0279846422346446, - "learning_rate": 9.892315013678323e-06, - "loss": 0.7597, + "epoch": 0.06, + "grad_norm": 1.9825613559009032, + "learning_rate": 9.97048377622753e-06, + "loss": 0.5272, "step": 896 }, { - "epoch": 0.09, - "grad_norm": 2.55744056624668, - "learning_rate": 9.891962936789983e-06, - "loss": 0.7977, + "epoch": 0.06, + "grad_norm": 8.720347274979087, + "learning_rate": 9.970358963557849e-06, + "loss": 0.6007, "step": 897 }, { - "epoch": 0.09, - "grad_norm": 2.569713950624268, - "learning_rate": 9.891610291567529e-06, - "loss": 0.7722, + "epoch": 0.06, + "grad_norm": 1.7666897572156686, + "learning_rate": 9.970233888336969e-06, + "loss": 0.6003, "step": 898 }, { - "epoch": 0.09, - "grad_norm": 2.8251737732812003, - "learning_rate": 9.891257078051932e-06, - "loss": 0.8538, + "epoch": 0.06, + "grad_norm": 1.823990328529463, + "learning_rate": 9.970108550571492e-06, + "loss": 0.6949, "step": 899 }, { - "epoch": 0.09, - "grad_norm": 3.940064605870108, - "learning_rate": 9.890903296284228e-06, - "loss": 0.6861, + "epoch": 0.06, + "grad_norm": 2.0460248580961506, + "learning_rate": 9.96998295026804e-06, + "loss": 0.6285, "step": 900 }, { - "epoch": 0.09, - "grad_norm": 2.3962938305889727, - "learning_rate": 9.890548946305516e-06, - "loss": 0.7367, + "epoch": 0.06, + "grad_norm": 2.1444350798914584, + "learning_rate": 9.96985708743325e-06, + "loss": 0.5693, "step": 901 }, { - "epoch": 0.09, - "grad_norm": 2.6270790503951877, - "learning_rate": 9.890194028156965e-06, - "loss": 0.6927, + "epoch": 0.06, + "grad_norm": 1.9432446655007625, + "learning_rate": 9.969730962073766e-06, + "loss": 0.5978, "step": 902 }, { - "epoch": 0.1, - "grad_norm": 2.2464932261338486, - "learning_rate": 9.889838541879808e-06, - "loss": 0.6671, + "epoch": 0.06, + "grad_norm": 2.407018149499689, + "learning_rate": 9.969604574196255e-06, + "loss": 0.6224, "step": 903 }, { - "epoch": 0.1, - "grad_norm": 2.2068048828956184, - "learning_rate": 9.889482487515344e-06, - "loss": 0.6574, + "epoch": 0.06, + "grad_norm": 1.0037136435872804, + "learning_rate": 9.96947792380739e-06, + "loss": 0.4598, "step": 904 }, { - "epoch": 0.1, - "grad_norm": 4.131286705587521, - "learning_rate": 9.889125865104939e-06, - "loss": 0.6844, + "epoch": 0.06, + "grad_norm": 1.8326036091806832, + "learning_rate": 9.969351010913863e-06, + "loss": 0.7125, "step": 905 }, { - "epoch": 0.1, - "grad_norm": 2.4854115277088384, - "learning_rate": 9.888768674690023e-06, - "loss": 0.7562, + "epoch": 0.06, + "grad_norm": 1.830851819927731, + "learning_rate": 9.969223835522377e-06, + "loss": 0.6518, "step": 906 }, { - "epoch": 0.1, - "grad_norm": 2.430105603029439, - "learning_rate": 9.888410916312096e-06, - "loss": 0.8826, + "epoch": 0.06, + "grad_norm": 1.7779405420608956, + "learning_rate": 9.969096397639647e-06, + "loss": 0.6539, "step": 907 }, { - "epoch": 0.1, - "grad_norm": 2.3307764886099447, - "learning_rate": 9.888052590012719e-06, - "loss": 0.6994, + "epoch": 0.06, + "grad_norm": 2.153927525701028, + "learning_rate": 9.968968697272413e-06, + "loss": 0.5624, "step": 908 }, { - "epoch": 0.1, - "grad_norm": 2.399232411769506, - "learning_rate": 9.887693695833522e-06, - "loss": 0.7844, + "epoch": 0.06, + "grad_norm": 0.9874270094416919, + "learning_rate": 9.968840734427411e-06, + "loss": 0.457, "step": 909 }, { - "epoch": 0.1, - "grad_norm": 2.4841741299380127, - "learning_rate": 9.887334233816199e-06, - "loss": 0.7191, + "epoch": 0.06, + "grad_norm": 2.006708477860132, + "learning_rate": 9.968712509111405e-06, + "loss": 0.5349, "step": 910 }, { - "epoch": 0.1, - "grad_norm": 2.231644133692005, - "learning_rate": 9.886974204002514e-06, - "loss": 0.7236, + "epoch": 0.06, + "grad_norm": 1.6891536424160458, + "learning_rate": 9.968584021331169e-06, + "loss": 0.6229, "step": 911 }, { - "epoch": 0.1, - "grad_norm": 2.589193939890633, - "learning_rate": 9.886613606434294e-06, - "loss": 0.8006, + "epoch": 0.06, + "grad_norm": 2.2400003935522346, + "learning_rate": 9.96845527109349e-06, + "loss": 0.595, "step": 912 }, { - "epoch": 0.1, - "grad_norm": 2.8815065090619294, - "learning_rate": 9.886252441153428e-06, - "loss": 0.762, + "epoch": 0.06, + "grad_norm": 1.8197738409863462, + "learning_rate": 9.968326258405168e-06, + "loss": 0.5783, "step": 913 }, { - "epoch": 0.1, - "grad_norm": 2.458751138818673, - "learning_rate": 9.885890708201881e-06, - "loss": 0.748, + "epoch": 0.06, + "grad_norm": 1.7945323729979659, + "learning_rate": 9.968196983273018e-06, + "loss": 0.5785, "step": 914 }, { - "epoch": 0.1, - "grad_norm": 2.729352069908257, - "learning_rate": 9.885528407621674e-06, - "loss": 0.7319, + "epoch": 0.06, + "grad_norm": 0.8985331650990965, + "learning_rate": 9.968067445703866e-06, + "loss": 0.4501, "step": 915 }, { - "epoch": 0.1, - "grad_norm": 2.7456542817248035, - "learning_rate": 9.885165539454898e-06, - "loss": 0.7691, + "epoch": 0.07, + "grad_norm": 2.2284875187402653, + "learning_rate": 9.96793764570456e-06, + "loss": 0.5747, "step": 916 }, { - "epoch": 0.1, - "grad_norm": 2.821954609815299, - "learning_rate": 9.884802103743712e-06, - "loss": 0.724, + "epoch": 0.07, + "grad_norm": 2.7289751817573573, + "learning_rate": 9.967807583281955e-06, + "loss": 0.6883, "step": 917 }, { - "epoch": 0.1, - "grad_norm": 2.507495458671125, - "learning_rate": 9.88443810053034e-06, - "loss": 0.7557, + "epoch": 0.07, + "grad_norm": 0.9617652798852775, + "learning_rate": 9.967677258442918e-06, + "loss": 0.4677, "step": 918 }, { - "epoch": 0.1, - "grad_norm": 2.813557690867528, - "learning_rate": 9.884073529857066e-06, - "loss": 0.7378, + "epoch": 0.07, + "grad_norm": 1.869470988391731, + "learning_rate": 9.967546671194335e-06, + "loss": 0.576, "step": 919 }, { - "epoch": 0.1, - "grad_norm": 1.2109392193308994, - "learning_rate": 9.883708391766248e-06, - "loss": 0.655, + "epoch": 0.07, + "grad_norm": 1.9119344849532227, + "learning_rate": 9.967415821543107e-06, + "loss": 0.5712, "step": 920 }, { - "epoch": 0.1, - "grad_norm": 2.7797266133676817, - "learning_rate": 9.88334268630031e-06, - "loss": 0.7568, + "epoch": 0.07, + "grad_norm": 2.911043116077983, + "learning_rate": 9.967284709496142e-06, + "loss": 0.6001, "step": 921 }, { - "epoch": 0.1, - "grad_norm": 2.291991455347209, - "learning_rate": 9.882976413501733e-06, - "loss": 0.6939, + "epoch": 0.07, + "grad_norm": 1.7565667783594785, + "learning_rate": 9.967153335060367e-06, + "loss": 0.6843, "step": 922 }, { - "epoch": 0.1, - "grad_norm": 2.2553429818506694, - "learning_rate": 9.88260957341307e-06, - "loss": 0.8572, + "epoch": 0.07, + "grad_norm": 3.232203497547063, + "learning_rate": 9.96702169824272e-06, + "loss": 0.6291, "step": 923 }, { - "epoch": 0.1, - "grad_norm": 2.944662889076845, - "learning_rate": 9.882242166076942e-06, - "loss": 0.7171, + "epoch": 0.07, + "grad_norm": 1.9603634692549758, + "learning_rate": 9.96688979905016e-06, + "loss": 0.5934, "step": 924 }, { - "epoch": 0.1, - "grad_norm": 2.6005068384270817, - "learning_rate": 9.881874191536032e-06, - "loss": 0.7304, + "epoch": 0.07, + "grad_norm": 1.6212481980922946, + "learning_rate": 9.966757637489647e-06, + "loss": 0.6018, "step": 925 }, { - "epoch": 0.1, - "grad_norm": 2.466317488664055, - "learning_rate": 9.881505649833091e-06, - "loss": 0.7116, + "epoch": 0.07, + "grad_norm": 2.1877457261719644, + "learning_rate": 9.966625213568169e-06, + "loss": 0.5791, "step": 926 }, { - "epoch": 0.1, - "grad_norm": 2.4452143875236243, - "learning_rate": 9.881136541010934e-06, - "loss": 0.7629, + "epoch": 0.07, + "grad_norm": 1.7952240870885958, + "learning_rate": 9.966492527292718e-06, + "loss": 0.6318, "step": 927 }, { - "epoch": 0.1, - "grad_norm": 2.5102243570459595, - "learning_rate": 9.880766865112444e-06, - "loss": 0.7286, + "epoch": 0.07, + "grad_norm": 0.9023042009864433, + "learning_rate": 9.966359578670302e-06, + "loss": 0.4718, "step": 928 }, { - "epoch": 0.1, - "grad_norm": 2.7791143392672875, - "learning_rate": 9.880396622180567e-06, - "loss": 0.7923, + "epoch": 0.07, + "grad_norm": 1.886150086637256, + "learning_rate": 9.966226367707943e-06, + "loss": 0.5645, "step": 929 }, { - "epoch": 0.1, - "grad_norm": 2.876441285426497, - "learning_rate": 9.880025812258322e-06, - "loss": 0.7381, + "epoch": 0.07, + "grad_norm": 1.8619355750875033, + "learning_rate": 9.966092894412683e-06, + "loss": 0.6124, "step": 930 }, { - "epoch": 0.1, - "grad_norm": 3.2830140025930246, - "learning_rate": 9.879654435388781e-06, - "loss": 0.7657, + "epoch": 0.07, + "grad_norm": 2.3138410422118505, + "learning_rate": 9.965959158791564e-06, + "loss": 0.6055, "step": 931 }, { - "epoch": 0.1, - "grad_norm": 2.1392167947242817, - "learning_rate": 9.879282491615096e-06, - "loss": 0.7584, + "epoch": 0.07, + "grad_norm": 1.717862802490277, + "learning_rate": 9.96582516085166e-06, + "loss": 0.6625, "step": 932 }, { - "epoch": 0.1, - "grad_norm": 2.5574554782173244, - "learning_rate": 9.878909980980475e-06, - "loss": 0.6736, + "epoch": 0.07, + "grad_norm": 0.8584968622231486, + "learning_rate": 9.965690900600042e-06, + "loss": 0.4908, "step": 933 }, { - "epoch": 0.1, - "grad_norm": 2.482015182428965, - "learning_rate": 9.878536903528195e-06, - "loss": 0.7855, + "epoch": 0.07, + "grad_norm": 1.909652169596075, + "learning_rate": 9.965556378043806e-06, + "loss": 0.6712, "step": 934 }, { - "epoch": 0.1, - "grad_norm": 2.20969188023806, - "learning_rate": 9.8781632593016e-06, - "loss": 0.7142, + "epoch": 0.07, + "grad_norm": 1.6739853923745158, + "learning_rate": 9.965421593190055e-06, + "loss": 0.6092, "step": 935 }, { - "epoch": 0.1, - "grad_norm": 2.6096755265839593, - "learning_rate": 9.8777890483441e-06, - "loss": 0.8059, + "epoch": 0.07, + "grad_norm": 3.0279753874320314, + "learning_rate": 9.965286546045911e-06, + "loss": 0.5493, "step": 936 }, { - "epoch": 0.1, - "grad_norm": 2.6014399338081122, - "learning_rate": 9.877414270699168e-06, - "loss": 0.7378, + "epoch": 0.07, + "grad_norm": 1.979034693393833, + "learning_rate": 9.965151236618506e-06, + "loss": 0.6632, "step": 937 }, { - "epoch": 0.1, - "grad_norm": 2.1756243191451383, - "learning_rate": 9.877038926410346e-06, - "loss": 0.7268, + "epoch": 0.07, + "grad_norm": 0.9898820613403267, + "learning_rate": 9.96501566491499e-06, + "loss": 0.4634, "step": 938 }, { - "epoch": 0.1, - "grad_norm": 2.196316926201555, - "learning_rate": 9.876663015521237e-06, - "loss": 0.8158, + "epoch": 0.07, + "grad_norm": 1.8261873933090123, + "learning_rate": 9.964879830942522e-06, + "loss": 0.6541, "step": 939 }, { - "epoch": 0.1, - "grad_norm": 2.383983584002673, - "learning_rate": 9.876286538075519e-06, - "loss": 0.7215, + "epoch": 0.07, + "grad_norm": 2.0686272120013807, + "learning_rate": 9.96474373470828e-06, + "loss": 0.5171, "step": 940 }, { - "epoch": 0.1, - "grad_norm": 2.7085992373351204, - "learning_rate": 9.875909494116925e-06, - "loss": 0.6575, + "epoch": 0.07, + "grad_norm": 1.6688511068608667, + "learning_rate": 9.964607376219448e-06, + "loss": 0.5622, "step": 941 }, { - "epoch": 0.1, - "grad_norm": 2.5130773928374737, - "learning_rate": 9.875531883689262e-06, - "loss": 0.6911, + "epoch": 0.07, + "grad_norm": 1.871077513019353, + "learning_rate": 9.964470755483233e-06, + "loss": 0.6285, "step": 942 }, { - "epoch": 0.1, - "grad_norm": 3.318178727992876, - "learning_rate": 9.875153706836397e-06, - "loss": 0.788, + "epoch": 0.07, + "grad_norm": 2.052486629540008, + "learning_rate": 9.964333872506852e-06, + "loss": 0.6266, "step": 943 }, { - "epoch": 0.1, - "grad_norm": 4.286494331288682, - "learning_rate": 9.874774963602268e-06, - "loss": 0.7114, + "epoch": 0.07, + "grad_norm": 1.786169561760334, + "learning_rate": 9.964196727297533e-06, + "loss": 0.6136, "step": 944 }, { - "epoch": 0.1, - "grad_norm": 2.1591477947522186, - "learning_rate": 9.874395654030876e-06, - "loss": 0.7871, + "epoch": 0.07, + "grad_norm": 0.9488176618114591, + "learning_rate": 9.964059319862522e-06, + "loss": 0.4556, "step": 945 }, { - "epoch": 0.1, - "grad_norm": 2.1547218552212177, - "learning_rate": 9.874015778166285e-06, - "loss": 0.8348, + "epoch": 0.07, + "grad_norm": 1.713882689591305, + "learning_rate": 9.963921650209079e-06, + "loss": 0.5916, "step": 946 }, { - "epoch": 0.1, - "grad_norm": 2.762373245266379, - "learning_rate": 9.873635336052633e-06, - "loss": 0.7688, + "epoch": 0.07, + "grad_norm": 8.156210591485637, + "learning_rate": 9.963783718344471e-06, + "loss": 0.5972, "step": 947 }, { - "epoch": 0.1, - "grad_norm": 2.309708677231106, - "learning_rate": 9.873254327734115e-06, - "loss": 0.747, + "epoch": 0.07, + "grad_norm": 1.6735671467686146, + "learning_rate": 9.963645524275991e-06, + "loss": 0.584, "step": 948 }, { - "epoch": 0.1, - "grad_norm": 2.053888930700921, - "learning_rate": 9.872872753254996e-06, - "loss": 0.7223, + "epoch": 0.07, + "grad_norm": 2.276830798852272, + "learning_rate": 9.963507068010933e-06, + "loss": 0.6418, "step": 949 }, { - "epoch": 0.1, - "grad_norm": 3.5008788542760363, - "learning_rate": 9.872490612659607e-06, - "loss": 0.6766, + "epoch": 0.07, + "grad_norm": 2.3441157082821746, + "learning_rate": 9.963368349556614e-06, + "loss": 0.5468, "step": 950 }, { - "epoch": 0.1, - "grad_norm": 2.3863679851555673, - "learning_rate": 9.872107905992343e-06, - "loss": 0.6972, + "epoch": 0.07, + "grad_norm": 2.342859980279433, + "learning_rate": 9.96322936892036e-06, + "loss": 0.6946, "step": 951 }, { - "epoch": 0.1, - "grad_norm": 2.921615156601913, - "learning_rate": 9.871724633297666e-06, - "loss": 0.6999, + "epoch": 0.07, + "grad_norm": 1.700710644353618, + "learning_rate": 9.963090126109513e-06, + "loss": 0.5941, "step": 952 }, { - "epoch": 0.1, - "grad_norm": 3.2644813929698615, - "learning_rate": 9.871340794620103e-06, - "loss": 0.761, + "epoch": 0.07, + "grad_norm": 1.9769494657080284, + "learning_rate": 9.962950621131428e-06, + "loss": 0.5949, "step": 953 }, { - "epoch": 0.1, - "grad_norm": 2.1191106722345814, - "learning_rate": 9.87095639000425e-06, - "loss": 0.7376, + "epoch": 0.07, + "grad_norm": 2.876909040872116, + "learning_rate": 9.962810853993476e-06, + "loss": 0.5814, "step": 954 }, { - "epoch": 0.1, - "grad_norm": 6.026837083096435, - "learning_rate": 9.870571419494764e-06, - "loss": 0.7222, + "epoch": 0.07, + "grad_norm": 1.6465083760938293, + "learning_rate": 9.962670824703036e-06, + "loss": 0.551, "step": 955 }, { - "epoch": 0.1, - "grad_norm": 2.336308324288583, - "learning_rate": 9.87018588313637e-06, - "loss": 0.7229, + "epoch": 0.07, + "grad_norm": 2.1420842510858478, + "learning_rate": 9.962530533267509e-06, + "loss": 0.6299, "step": 956 }, { - "epoch": 0.1, - "grad_norm": 2.456262394309276, - "learning_rate": 9.869799780973856e-06, - "loss": 0.7182, + "epoch": 0.07, + "grad_norm": 2.0857649474567412, + "learning_rate": 9.962389979694305e-06, + "loss": 0.5453, "step": 957 }, { - "epoch": 0.1, - "grad_norm": 2.5276315796370676, - "learning_rate": 9.869413113052084e-06, - "loss": 0.6835, + "epoch": 0.07, + "grad_norm": 2.0686946954752052, + "learning_rate": 9.962249163990845e-06, + "loss": 0.5879, "step": 958 }, { - "epoch": 0.1, - "grad_norm": 3.1051858480552497, - "learning_rate": 9.86902587941597e-06, - "loss": 0.7101, + "epoch": 0.07, + "grad_norm": 2.2381282580812614, + "learning_rate": 9.96210808616457e-06, + "loss": 0.5538, "step": 959 }, { - "epoch": 0.1, - "grad_norm": 3.3731070604108844, - "learning_rate": 9.868638080110507e-06, - "loss": 0.7221, + "epoch": 0.07, + "grad_norm": 1.8980944721668438, + "learning_rate": 9.961966746222932e-06, + "loss": 0.565, "step": 960 }, { - "epoch": 0.1, - "grad_norm": 2.0867518495145223, - "learning_rate": 9.868249715180741e-06, - "loss": 0.7046, + "epoch": 0.07, + "grad_norm": 1.1146849728949642, + "learning_rate": 9.961825144173398e-06, + "loss": 0.4633, "step": 961 }, { - "epoch": 0.1, - "grad_norm": 2.0764983780128166, - "learning_rate": 9.8678607846718e-06, - "loss": 0.7498, + "epoch": 0.07, + "grad_norm": 1.930090766725209, + "learning_rate": 9.961683280023446e-06, + "loss": 0.5319, "step": 962 }, { - "epoch": 0.1, - "grad_norm": 2.1440243110852037, - "learning_rate": 9.867471288628863e-06, - "loss": 0.6442, + "epoch": 0.07, + "grad_norm": 1.9261510435773344, + "learning_rate": 9.961541153780571e-06, + "loss": 0.5924, "step": 963 }, { - "epoch": 0.1, - "grad_norm": 2.559996501289641, - "learning_rate": 9.867081227097182e-06, - "loss": 0.7556, + "epoch": 0.07, + "grad_norm": 2.696804409059998, + "learning_rate": 9.961398765452282e-06, + "loss": 0.5689, "step": 964 }, { - "epoch": 0.1, - "grad_norm": 2.546829215817774, - "learning_rate": 9.866690600122075e-06, - "loss": 0.7098, + "epoch": 0.07, + "grad_norm": 1.667347481743653, + "learning_rate": 9.961256115046096e-06, + "loss": 0.5659, "step": 965 }, { - "epoch": 0.1, - "grad_norm": 2.692415588752108, - "learning_rate": 9.866299407748921e-06, - "loss": 0.694, + "epoch": 0.07, + "grad_norm": 1.7467911315034046, + "learning_rate": 9.961113202569553e-06, + "loss": 0.6208, "step": 966 }, { - "epoch": 0.1, - "grad_norm": 2.336011704071477, - "learning_rate": 9.865907650023167e-06, - "loss": 0.7751, + "epoch": 0.07, + "grad_norm": 1.9166040700094245, + "learning_rate": 9.960970028030199e-06, + "loss": 0.5923, "step": 967 }, { - "epoch": 0.1, - "grad_norm": 2.3030234604337436, - "learning_rate": 9.865515326990332e-06, - "loss": 0.68, + "epoch": 0.07, + "grad_norm": 1.631080649964886, + "learning_rate": 9.960826591435598e-06, + "loss": 0.5493, "step": 968 }, { - "epoch": 0.1, - "grad_norm": 2.3788631634376527, - "learning_rate": 9.865122438695988e-06, - "loss": 0.7517, + "epoch": 0.07, + "grad_norm": 7.753504265636464, + "learning_rate": 9.960682892793328e-06, + "loss": 0.6408, "step": 969 }, { - "epoch": 0.1, - "grad_norm": 2.594025015808721, - "learning_rate": 9.864728985185783e-06, - "loss": 0.7859, + "epoch": 0.07, + "grad_norm": 1.9917995419167682, + "learning_rate": 9.960538932110977e-06, + "loss": 0.6085, "step": 970 }, { - "epoch": 0.1, - "grad_norm": 2.2839322712235326, - "learning_rate": 9.86433496650543e-06, - "loss": 0.7123, + "epoch": 0.07, + "grad_norm": 1.5972018439183042, + "learning_rate": 9.960394709396152e-06, + "loss": 0.5051, "step": 971 }, { - "epoch": 0.1, - "grad_norm": 2.7189304806754397, - "learning_rate": 9.863940382700699e-06, - "loss": 0.7495, + "epoch": 0.07, + "grad_norm": 1.9839178919532483, + "learning_rate": 9.960250224656468e-06, + "loss": 0.7072, "step": 972 }, { - "epoch": 0.1, - "grad_norm": 2.5542757749406033, - "learning_rate": 9.863545233817436e-06, - "loss": 0.7249, + "epoch": 0.07, + "grad_norm": 1.7956244745958974, + "learning_rate": 9.960105477899565e-06, + "loss": 0.6235, "step": 973 }, { - "epoch": 0.1, - "grad_norm": 2.2818240056053565, - "learning_rate": 9.863149519901545e-06, - "loss": 0.7263, + "epoch": 0.07, + "grad_norm": 1.9516011221943101, + "learning_rate": 9.959960469133079e-06, + "loss": 0.5534, "step": 974 }, { - "epoch": 0.1, - "grad_norm": 5.980498735075583, - "learning_rate": 9.862753240999001e-06, - "loss": 0.796, + "epoch": 0.07, + "grad_norm": 2.2355846997661404, + "learning_rate": 9.959815198364676e-06, + "loss": 0.5872, "step": 975 }, { - "epoch": 0.1, - "grad_norm": 2.17696941282096, - "learning_rate": 9.862356397155843e-06, - "loss": 0.7528, + "epoch": 0.07, + "grad_norm": 1.9017082237600156, + "learning_rate": 9.959669665602026e-06, + "loss": 0.557, "step": 976 }, { - "epoch": 0.1, - "grad_norm": 2.7279007297671747, - "learning_rate": 9.861958988418174e-06, - "loss": 0.7417, + "epoch": 0.07, + "grad_norm": 1.8077601025295578, + "learning_rate": 9.959523870852821e-06, + "loss": 0.6623, "step": 977 }, { - "epoch": 0.1, - "grad_norm": 2.7775418932594853, - "learning_rate": 9.861561014832166e-06, - "loss": 0.6685, + "epoch": 0.07, + "grad_norm": 1.7877833045022467, + "learning_rate": 9.959377814124759e-06, + "loss": 0.5579, "step": 978 }, { - "epoch": 0.1, - "grad_norm": 3.0608876144824255, - "learning_rate": 9.86116247644405e-06, - "loss": 0.796, + "epoch": 0.07, + "grad_norm": 2.1833227795127934, + "learning_rate": 9.959231495425557e-06, + "loss": 0.6484, "step": 979 }, { - "epoch": 0.1, - "grad_norm": 2.230200096432408, - "learning_rate": 9.860763373300133e-06, - "loss": 0.682, + "epoch": 0.07, + "grad_norm": 1.9092953043499936, + "learning_rate": 9.959084914762942e-06, + "loss": 0.6368, "step": 980 }, { - "epoch": 0.1, - "grad_norm": 2.583892124389053, - "learning_rate": 9.860363705446776e-06, - "loss": 0.7154, + "epoch": 0.07, + "grad_norm": 1.8781677243250605, + "learning_rate": 9.958938072144659e-06, + "loss": 0.5918, "step": 981 }, { - "epoch": 0.1, - "grad_norm": 3.420436801225999, - "learning_rate": 9.859963472930413e-06, - "loss": 0.6849, + "epoch": 0.07, + "grad_norm": 1.6665327415494193, + "learning_rate": 9.958790967578463e-06, + "loss": 0.5418, "step": 982 }, { - "epoch": 0.1, - "grad_norm": 2.5498244013352545, - "learning_rate": 9.859562675797543e-06, - "loss": 0.8011, + "epoch": 0.07, + "grad_norm": 1.8307992478320423, + "learning_rate": 9.958643601072126e-06, + "loss": 0.553, "step": 983 }, { - "epoch": 0.1, - "grad_norm": 2.1659421053037153, - "learning_rate": 9.85916131409473e-06, - "loss": 0.7394, + "epoch": 0.07, + "grad_norm": 1.9220979981484367, + "learning_rate": 9.958495972633431e-06, + "loss": 0.5941, "step": 984 }, { - "epoch": 0.1, - "grad_norm": 3.338196115649606, - "learning_rate": 9.858759387868601e-06, - "loss": 0.7245, + "epoch": 0.07, + "grad_norm": 1.9457239907818042, + "learning_rate": 9.958348082270178e-06, + "loss": 0.6743, "step": 985 }, { - "epoch": 0.1, - "grad_norm": 2.7321452634255357, - "learning_rate": 9.858356897165853e-06, - "loss": 0.7919, + "epoch": 0.07, + "grad_norm": 2.764260761611482, + "learning_rate": 9.958199929990178e-06, + "loss": 0.6568, "step": 986 }, { - "epoch": 0.1, - "grad_norm": 2.6253699652801417, - "learning_rate": 9.857953842033243e-06, - "loss": 0.733, + "epoch": 0.07, + "grad_norm": 1.7856321916081361, + "learning_rate": 9.958051515801257e-06, + "loss": 0.6022, "step": 987 }, { - "epoch": 0.1, - "grad_norm": 2.59635003190081, - "learning_rate": 9.857550222517598e-06, - "loss": 0.7092, + "epoch": 0.07, + "grad_norm": 1.8732212657590062, + "learning_rate": 9.957902839711254e-06, + "loss": 0.6485, "step": 988 }, { - "epoch": 0.1, - "grad_norm": 2.621851871749562, - "learning_rate": 9.857146038665812e-06, - "loss": 0.7042, + "epoch": 0.07, + "grad_norm": 1.8838482369742178, + "learning_rate": 9.957753901728024e-06, + "loss": 0.5916, "step": 989 }, { - "epoch": 0.1, - "grad_norm": 2.7774812468209973, - "learning_rate": 9.856741290524839e-06, - "loss": 0.7218, + "epoch": 0.07, + "grad_norm": 2.4272359058088626, + "learning_rate": 9.957604701859434e-06, + "loss": 0.6082, "step": 990 }, { - "epoch": 0.1, - "grad_norm": 2.9702400566443012, - "learning_rate": 9.856335978141703e-06, - "loss": 0.7605, + "epoch": 0.07, + "grad_norm": 2.1391578458822056, + "learning_rate": 9.957455240113365e-06, + "loss": 0.5857, "step": 991 }, { - "epoch": 0.1, - "grad_norm": 3.229378466858724, - "learning_rate": 9.85593010156349e-06, - "loss": 0.7731, + "epoch": 0.07, + "grad_norm": 1.8038855192382879, + "learning_rate": 9.957305516497712e-06, + "loss": 0.5782, "step": 992 }, { - "epoch": 0.1, - "grad_norm": 2.985236389182805, - "learning_rate": 9.855523660837355e-06, - "loss": 0.7433, + "epoch": 0.07, + "grad_norm": 2.309851224451441, + "learning_rate": 9.957155531020385e-06, + "loss": 0.6199, "step": 993 }, { - "epoch": 0.1, - "grad_norm": 2.418368994051385, - "learning_rate": 9.855116656010518e-06, - "loss": 0.6799, + "epoch": 0.07, + "grad_norm": 1.8265647357635815, + "learning_rate": 9.957005283689303e-06, + "loss": 0.5406, "step": 994 }, { - "epoch": 0.1, - "grad_norm": 2.6770469216630866, - "learning_rate": 9.854709087130261e-06, - "loss": 0.6967, + "epoch": 0.07, + "grad_norm": 1.9328246685906743, + "learning_rate": 9.956854774512407e-06, + "loss": 0.5896, "step": 995 }, { - "epoch": 0.1, - "grad_norm": 1.2596414821764004, - "learning_rate": 9.854300954243937e-06, - "loss": 0.6321, + "epoch": 0.07, + "grad_norm": 2.1314023843884473, + "learning_rate": 9.956704003497646e-06, + "loss": 0.6049, "step": 996 }, { - "epoch": 0.1, - "grad_norm": 1.1913642125753563, - "learning_rate": 9.853892257398961e-06, - "loss": 0.6239, + "epoch": 0.07, + "grad_norm": 1.9500532067357477, + "learning_rate": 9.956552970652984e-06, + "loss": 0.6205, "step": 997 }, { - "epoch": 0.11, - "grad_norm": 5.038065448125383, - "learning_rate": 9.853482996642812e-06, - "loss": 0.8232, + "epoch": 0.07, + "grad_norm": 1.7889198911358128, + "learning_rate": 9.956401675986398e-06, + "loss": 0.5817, "step": 998 }, { - "epoch": 0.11, - "grad_norm": 2.80199548153629, - "learning_rate": 9.85307317202304e-06, - "loss": 0.7232, + "epoch": 0.07, + "grad_norm": 2.177320840210121, + "learning_rate": 9.956250119505881e-06, + "loss": 0.6471, "step": 999 }, { - "epoch": 0.11, - "grad_norm": 3.3246781936353464, - "learning_rate": 9.852662783587255e-06, - "loss": 0.6723, + "epoch": 0.07, + "grad_norm": 2.085154339503883, + "learning_rate": 9.956098301219438e-06, + "loss": 0.5975, "step": 1000 }, { - "epoch": 0.11, - "grad_norm": 2.285393284696264, - "learning_rate": 9.852251831383136e-06, - "loss": 0.7249, + "epoch": 0.07, + "grad_norm": 1.905296200571165, + "learning_rate": 9.955946221135091e-06, + "loss": 0.5691, "step": 1001 }, { - "epoch": 0.11, - "grad_norm": 2.6840301004398026, - "learning_rate": 9.851840315458424e-06, - "loss": 0.8205, + "epoch": 0.07, + "grad_norm": 3.0593920515931994, + "learning_rate": 9.955793879260872e-06, + "loss": 0.6006, "step": 1002 }, { - "epoch": 0.11, - "grad_norm": 2.635419321078597, - "learning_rate": 9.85142823586093e-06, - "loss": 0.7197, + "epoch": 0.07, + "grad_norm": 0.9195687243340704, + "learning_rate": 9.955641275604825e-06, + "loss": 0.4678, "step": 1003 }, { - "epoch": 0.11, - "grad_norm": 3.832069075664214, - "learning_rate": 9.851015592638528e-06, - "loss": 0.7241, + "epoch": 0.07, + "grad_norm": 2.264031790407994, + "learning_rate": 9.955488410175016e-06, + "loss": 0.5508, "step": 1004 }, { - "epoch": 0.11, - "grad_norm": 2.533811572972949, - "learning_rate": 9.850602385839158e-06, - "loss": 0.7935, + "epoch": 0.07, + "grad_norm": 1.621076090187733, + "learning_rate": 9.955335282979517e-06, + "loss": 0.5568, "step": 1005 }, { - "epoch": 0.11, - "grad_norm": 3.225027430000192, - "learning_rate": 9.850188615510824e-06, - "loss": 0.6579, + "epoch": 0.07, + "grad_norm": 0.9371986024203656, + "learning_rate": 9.955181894026415e-06, + "loss": 0.4701, "step": 1006 }, { - "epoch": 0.11, - "grad_norm": 2.3382405131753483, - "learning_rate": 9.849774281701597e-06, - "loss": 0.7659, + "epoch": 0.07, + "grad_norm": 1.6427943191551786, + "learning_rate": 9.95502824332382e-06, + "loss": 0.5871, "step": 1007 }, { - "epoch": 0.11, - "grad_norm": 2.097379634081843, - "learning_rate": 9.849359384459614e-06, - "loss": 0.7244, + "epoch": 0.07, + "grad_norm": 1.7513905841246618, + "learning_rate": 9.954874330879839e-06, + "loss": 0.5945, "step": 1008 }, { - "epoch": 0.11, - "grad_norm": 2.882248967395376, - "learning_rate": 9.848943923833075e-06, - "loss": 0.699, + "epoch": 0.07, + "grad_norm": 1.7599051316341732, + "learning_rate": 9.95472015670261e-06, + "loss": 0.5607, "step": 1009 }, { - "epoch": 0.11, - "grad_norm": 3.1835190905227715, - "learning_rate": 9.848527899870249e-06, - "loss": 0.729, + "epoch": 0.07, + "grad_norm": 1.634660555100661, + "learning_rate": 9.95456572080027e-06, + "loss": 0.518, "step": 1010 }, { - "epoch": 0.11, - "grad_norm": 3.3344865062688966, - "learning_rate": 9.848111312619464e-06, - "loss": 0.7155, + "epoch": 0.07, + "grad_norm": 1.7147215798274211, + "learning_rate": 9.95441102318098e-06, + "loss": 0.5868, "step": 1011 }, { - "epoch": 0.11, - "grad_norm": 2.5563191668434757, - "learning_rate": 9.847694162129124e-06, - "loss": 0.718, + "epoch": 0.07, + "grad_norm": 2.819909120965664, + "learning_rate": 9.954256063852913e-06, + "loss": 0.6384, "step": 1012 }, { - "epoch": 0.11, - "grad_norm": 2.468563374343176, - "learning_rate": 9.84727644844769e-06, - "loss": 0.7124, + "epoch": 0.07, + "grad_norm": 1.8027608347820943, + "learning_rate": 9.954100842824256e-06, + "loss": 0.6005, "step": 1013 }, { - "epoch": 0.11, - "grad_norm": 2.7246642917209405, - "learning_rate": 9.846858171623687e-06, - "loss": 0.6024, + "epoch": 0.07, + "grad_norm": 1.7472294862032016, + "learning_rate": 9.953945360103202e-06, + "loss": 0.5864, "step": 1014 }, { - "epoch": 0.11, - "grad_norm": 2.0551445765951826, - "learning_rate": 9.846439331705715e-06, - "loss": 0.6882, + "epoch": 0.07, + "grad_norm": 2.4085909224303546, + "learning_rate": 9.95378961569797e-06, + "loss": 0.5914, "step": 1015 }, { - "epoch": 0.11, - "grad_norm": 2.9450655022782932, - "learning_rate": 9.846019928742432e-06, - "loss": 0.7355, + "epoch": 0.07, + "grad_norm": 1.8637468950007319, + "learning_rate": 9.953633609616786e-06, + "loss": 0.6092, "step": 1016 }, { - "epoch": 0.11, - "grad_norm": 2.1869980803278106, - "learning_rate": 9.84559996278256e-06, - "loss": 0.6998, + "epoch": 0.07, + "grad_norm": 1.0136157150020422, + "learning_rate": 9.953477341867887e-06, + "loss": 0.4541, "step": 1017 }, { - "epoch": 0.11, - "grad_norm": 2.0957427212075115, - "learning_rate": 9.845179433874891e-06, - "loss": 0.7364, + "epoch": 0.07, + "grad_norm": 2.0554482270108427, + "learning_rate": 9.95332081245953e-06, + "loss": 0.5952, "step": 1018 }, { - "epoch": 0.11, - "grad_norm": 2.2104276939981258, - "learning_rate": 9.844758342068284e-06, - "loss": 0.7528, + "epoch": 0.07, + "grad_norm": 0.8286010717785349, + "learning_rate": 9.953164021399986e-06, + "loss": 0.4569, "step": 1019 }, { - "epoch": 0.11, - "grad_norm": 2.2088600503635325, - "learning_rate": 9.844336687411657e-06, - "loss": 0.637, + "epoch": 0.07, + "grad_norm": 1.7710412966403686, + "learning_rate": 9.953006968697532e-06, + "loss": 0.5974, "step": 1020 }, { - "epoch": 0.11, - "grad_norm": 2.2733449333519444, - "learning_rate": 9.843914469953995e-06, - "loss": 0.7377, + "epoch": 0.07, + "grad_norm": 2.3860842730220106, + "learning_rate": 9.952849654360468e-06, + "loss": 0.557, "step": 1021 }, { - "epoch": 0.11, - "grad_norm": 6.872012422821911, - "learning_rate": 9.843491689744354e-06, - "loss": 0.7699, + "epoch": 0.07, + "grad_norm": 2.12677653936814, + "learning_rate": 9.952692078397104e-06, + "loss": 0.5814, "step": 1022 }, { - "epoch": 0.11, - "grad_norm": 2.471746142903541, - "learning_rate": 9.84306834683185e-06, - "loss": 0.7256, + "epoch": 0.07, + "grad_norm": 1.9015927872337524, + "learning_rate": 9.952534240815761e-06, + "loss": 0.5362, "step": 1023 }, { - "epoch": 0.11, - "grad_norm": 2.5086959706339997, - "learning_rate": 9.842644441265664e-06, - "loss": 0.8149, + "epoch": 0.07, + "grad_norm": 2.436318903492822, + "learning_rate": 9.952376141624777e-06, + "loss": 0.6558, "step": 1024 }, { - "epoch": 0.11, - "grad_norm": 2.3150840456614925, - "learning_rate": 9.842219973095045e-06, - "loss": 0.7124, + "epoch": 0.07, + "grad_norm": 1.746096871488504, + "learning_rate": 9.952217780832505e-06, + "loss": 0.6069, "step": 1025 }, { - "epoch": 0.11, - "grad_norm": 2.3604359593997164, - "learning_rate": 9.841794942369309e-06, - "loss": 0.6731, + "epoch": 0.07, + "grad_norm": 2.6366731461385875, + "learning_rate": 9.952059158447312e-06, + "loss": 0.5427, "step": 1026 }, { - "epoch": 0.11, - "grad_norm": 2.439817785760237, - "learning_rate": 9.841369349137832e-06, - "loss": 0.7622, + "epoch": 0.07, + "grad_norm": 5.83223506274411, + "learning_rate": 9.951900274477571e-06, + "loss": 0.6497, "step": 1027 }, { - "epoch": 0.11, - "grad_norm": 2.9509635034152497, - "learning_rate": 9.840943193450059e-06, - "loss": 0.7069, + "epoch": 0.07, + "grad_norm": 4.046706841433947, + "learning_rate": 9.95174112893168e-06, + "loss": 0.6756, "step": 1028 }, { - "epoch": 0.11, - "grad_norm": 2.9934027081797234, - "learning_rate": 9.840516475355499e-06, - "loss": 0.7784, + "epoch": 0.07, + "grad_norm": 2.407595421247788, + "learning_rate": 9.951581721818041e-06, + "loss": 0.5719, "step": 1029 }, { - "epoch": 0.11, - "grad_norm": 2.533628591878186, - "learning_rate": 9.840089194903729e-06, - "loss": 0.7079, + "epoch": 0.07, + "grad_norm": 1.909742886923054, + "learning_rate": 9.951422053145081e-06, + "loss": 0.632, "step": 1030 }, { - "epoch": 0.11, - "grad_norm": 2.4056446800859272, - "learning_rate": 9.839661352144386e-06, - "loss": 0.761, + "epoch": 0.07, + "grad_norm": 1.2548387890304005, + "learning_rate": 9.951262122921228e-06, + "loss": 0.4737, "step": 1031 }, { - "epoch": 0.11, - "grad_norm": 2.515210654037056, - "learning_rate": 9.839232947127178e-06, - "loss": 0.7748, + "epoch": 0.07, + "grad_norm": 1.7144660024606917, + "learning_rate": 9.951101931154933e-06, + "loss": 0.5959, "step": 1032 }, { - "epoch": 0.11, - "grad_norm": 2.108290692947581, - "learning_rate": 9.838803979901874e-06, - "loss": 0.7569, + "epoch": 0.07, + "grad_norm": 1.192050099722398, + "learning_rate": 9.950941477854659e-06, + "loss": 0.4719, "step": 1033 }, { - "epoch": 0.11, - "grad_norm": 3.079443665645577, - "learning_rate": 9.838374450518311e-06, - "loss": 0.7073, + "epoch": 0.07, + "grad_norm": 2.0154896917261853, + "learning_rate": 9.950780763028878e-06, + "loss": 0.5896, "step": 1034 }, { - "epoch": 0.11, - "grad_norm": 2.3720082565519367, - "learning_rate": 9.837944359026392e-06, - "loss": 0.735, + "epoch": 0.07, + "grad_norm": 0.9253854303917076, + "learning_rate": 9.950619786686083e-06, + "loss": 0.4625, "step": 1035 }, { - "epoch": 0.11, - "grad_norm": 2.6597037634218994, - "learning_rate": 9.837513705476082e-06, - "loss": 0.745, + "epoch": 0.07, + "grad_norm": 1.7669939912916404, + "learning_rate": 9.950458548834775e-06, + "loss": 0.6043, "step": 1036 }, { - "epoch": 0.11, - "grad_norm": 3.6631008749810534, - "learning_rate": 9.837082489917413e-06, - "loss": 0.7522, + "epoch": 0.07, + "grad_norm": 2.106498345073158, + "learning_rate": 9.950297049483472e-06, + "loss": 0.5116, "step": 1037 }, { - "epoch": 0.11, - "grad_norm": 2.7564725036319344, - "learning_rate": 9.836650712400484e-06, - "loss": 0.6971, + "epoch": 0.07, + "grad_norm": 1.559036732778817, + "learning_rate": 9.950135288640706e-06, + "loss": 0.5963, "step": 1038 }, { - "epoch": 0.11, - "grad_norm": 2.7150369300373387, - "learning_rate": 9.836218372975456e-06, - "loss": 0.7348, + "epoch": 0.07, + "grad_norm": 1.8934992065845806, + "learning_rate": 9.949973266315018e-06, + "loss": 0.6238, "step": 1039 }, { - "epoch": 0.11, - "grad_norm": 2.456649521111631, - "learning_rate": 9.835785471692559e-06, - "loss": 0.7276, + "epoch": 0.07, + "grad_norm": 1.7070859822507563, + "learning_rate": 9.949810982514971e-06, + "loss": 0.6488, "step": 1040 }, { - "epoch": 0.11, - "grad_norm": 2.4789195883853457, - "learning_rate": 9.835352008602081e-06, - "loss": 0.7406, + "epoch": 0.07, + "grad_norm": 1.9164769488059537, + "learning_rate": 9.949648437249138e-06, + "loss": 0.5585, "step": 1041 }, { - "epoch": 0.11, - "grad_norm": 2.240016595350792, - "learning_rate": 9.834917983754388e-06, - "loss": 0.7213, + "epoch": 0.07, + "grad_norm": 2.215174184219762, + "learning_rate": 9.949485630526099e-06, + "loss": 0.6298, "step": 1042 }, { - "epoch": 0.11, - "grad_norm": 2.4292109109445925, - "learning_rate": 9.834483397199897e-06, - "loss": 0.7573, + "epoch": 0.07, + "grad_norm": 1.715485794951897, + "learning_rate": 9.94932256235446e-06, + "loss": 0.6212, "step": 1043 }, { - "epoch": 0.11, - "grad_norm": 2.7009207551734242, - "learning_rate": 9.834048248989101e-06, - "loss": 0.7654, + "epoch": 0.07, + "grad_norm": 2.1331970908573354, + "learning_rate": 9.949159232742832e-06, + "loss": 0.526, "step": 1044 }, { - "epoch": 0.11, - "grad_norm": 3.359205277225582, - "learning_rate": 9.833612539172554e-06, - "loss": 0.6987, + "epoch": 0.07, + "grad_norm": 2.106084618614398, + "learning_rate": 9.948995641699844e-06, + "loss": 0.7176, "step": 1045 }, { - "epoch": 0.11, - "grad_norm": 4.052083146121145, - "learning_rate": 9.833176267800874e-06, - "loss": 0.8056, + "epoch": 0.07, + "grad_norm": 1.7439403995469547, + "learning_rate": 9.948831789234135e-06, + "loss": 0.566, "step": 1046 }, { - "epoch": 0.11, - "grad_norm": 2.7708114222605413, - "learning_rate": 9.832739434924747e-06, - "loss": 0.7814, + "epoch": 0.07, + "grad_norm": 2.0162970016182307, + "learning_rate": 9.948667675354365e-06, + "loss": 0.5867, "step": 1047 }, { - "epoch": 0.11, - "grad_norm": 2.4023504113769785, - "learning_rate": 9.832302040594923e-06, - "loss": 0.7072, + "epoch": 0.07, + "grad_norm": 1.1698415200384558, + "learning_rate": 9.948503300069197e-06, + "loss": 0.4817, "step": 1048 }, { - "epoch": 0.11, - "grad_norm": 2.926390430878239, - "learning_rate": 9.831864084862216e-06, - "loss": 0.7338, + "epoch": 0.07, + "grad_norm": 1.9293637615841233, + "learning_rate": 9.948338663387319e-06, + "loss": 0.6291, "step": 1049 }, { - "epoch": 0.11, - "grad_norm": 2.18607735411363, - "learning_rate": 9.831425567777506e-06, - "loss": 0.7444, + "epoch": 0.07, + "grad_norm": 1.741904619623014, + "learning_rate": 9.948173765317424e-06, + "loss": 0.6026, "step": 1050 }, { - "epoch": 0.11, - "grad_norm": 2.494103967751944, - "learning_rate": 9.830986489391743e-06, - "loss": 0.7925, + "epoch": 0.07, + "grad_norm": 1.9159397491325307, + "learning_rate": 9.948008605868224e-06, + "loss": 0.5795, "step": 1051 }, { - "epoch": 0.11, - "grad_norm": 2.65375185592133, - "learning_rate": 9.830546849755932e-06, - "loss": 0.7176, + "epoch": 0.07, + "grad_norm": 1.4563528745157615, + "learning_rate": 9.947843185048443e-06, + "loss": 0.4874, "step": 1052 }, { - "epoch": 0.11, - "grad_norm": 2.350034885411002, - "learning_rate": 9.830106648921152e-06, - "loss": 0.6827, + "epoch": 0.07, + "grad_norm": 2.6336049463940436, + "learning_rate": 9.94767750286682e-06, + "loss": 0.5718, "step": 1053 }, { - "epoch": 0.11, - "grad_norm": 3.2408010200349135, - "learning_rate": 9.829665886938544e-06, - "loss": 0.7066, + "epoch": 0.07, + "grad_norm": 1.9704737575128997, + "learning_rate": 9.947511559332107e-06, + "loss": 0.6471, "step": 1054 }, { - "epoch": 0.11, - "grad_norm": 3.0868922059592543, - "learning_rate": 9.829224563859314e-06, - "loss": 0.6116, + "epoch": 0.07, + "grad_norm": 1.6352919901322311, + "learning_rate": 9.947345354453068e-06, + "loss": 0.6399, "step": 1055 }, { - "epoch": 0.11, - "grad_norm": 2.941803666500437, - "learning_rate": 9.828782679734737e-06, - "loss": 0.8022, + "epoch": 0.07, + "grad_norm": 1.6996076903204365, + "learning_rate": 9.947178888238485e-06, + "loss": 0.6229, "step": 1056 }, { - "epoch": 0.11, - "grad_norm": 3.823063887767071, - "learning_rate": 9.828340234616142e-06, - "loss": 0.7427, + "epoch": 0.08, + "grad_norm": 1.6331791209378932, + "learning_rate": 9.947012160697149e-06, + "loss": 0.5935, "step": 1057 }, { - "epoch": 0.11, - "grad_norm": 2.8844101888229514, - "learning_rate": 9.827897228554939e-06, - "loss": 0.7707, + "epoch": 0.08, + "grad_norm": 2.0980869735292678, + "learning_rate": 9.946845171837866e-06, + "loss": 0.5947, "step": 1058 }, { - "epoch": 0.11, - "grad_norm": 3.0160506838501684, - "learning_rate": 9.827453661602592e-06, - "loss": 0.7326, + "epoch": 0.08, + "grad_norm": 1.693997810089867, + "learning_rate": 9.946677921669462e-06, + "loss": 0.5849, "step": 1059 }, { - "epoch": 0.11, - "grad_norm": 2.300778641620372, - "learning_rate": 9.827009533810632e-06, - "loss": 0.7175, + "epoch": 0.08, + "grad_norm": 1.802331373003337, + "learning_rate": 9.946510410200768e-06, + "loss": 0.6111, "step": 1060 }, { - "epoch": 0.11, - "grad_norm": 2.6516868846525616, - "learning_rate": 9.82656484523066e-06, - "loss": 0.7898, + "epoch": 0.08, + "grad_norm": 0.9928957996677363, + "learning_rate": 9.94634263744063e-06, + "loss": 0.4903, "step": 1061 }, { - "epoch": 0.11, - "grad_norm": 2.4715363916836623, - "learning_rate": 9.826119595914334e-06, - "loss": 0.6921, + "epoch": 0.08, + "grad_norm": 1.82141531879438, + "learning_rate": 9.946174603397915e-06, + "loss": 0.622, "step": 1062 }, { - "epoch": 0.11, - "grad_norm": 2.327340583098541, - "learning_rate": 9.825673785913385e-06, - "loss": 0.6928, + "epoch": 0.08, + "grad_norm": 1.7977843041525552, + "learning_rate": 9.9460063080815e-06, + "loss": 0.6069, "step": 1063 }, { - "epoch": 0.11, - "grad_norm": 2.759469881660751, - "learning_rate": 9.825227415279606e-06, - "loss": 0.7855, + "epoch": 0.08, + "grad_norm": 2.0142252920841166, + "learning_rate": 9.945837751500269e-06, + "loss": 0.5752, "step": 1064 }, { - "epoch": 0.11, - "grad_norm": 2.534088445808419, - "learning_rate": 9.824780484064853e-06, - "loss": 0.7307, + "epoch": 0.08, + "grad_norm": 1.6331377771756355, + "learning_rate": 9.945668933663131e-06, + "loss": 0.5668, "step": 1065 }, { - "epoch": 0.11, - "grad_norm": 2.1112574816361924, - "learning_rate": 9.824332992321052e-06, - "loss": 0.6811, + "epoch": 0.08, + "grad_norm": 1.7979694804994537, + "learning_rate": 9.945499854579002e-06, + "loss": 0.5581, "step": 1066 }, { - "epoch": 0.11, - "grad_norm": 2.373627167824405, - "learning_rate": 9.823884940100188e-06, - "loss": 0.7312, + "epoch": 0.08, + "grad_norm": 1.6909355623185462, + "learning_rate": 9.945330514256811e-06, + "loss": 0.5939, "step": 1067 }, { - "epoch": 0.11, - "grad_norm": 2.7883580813619697, - "learning_rate": 9.823436327454318e-06, - "loss": 0.7676, + "epoch": 0.08, + "grad_norm": 2.0428229522539514, + "learning_rate": 9.945160912705507e-06, + "loss": 0.59, "step": 1068 }, { - "epoch": 0.11, - "grad_norm": 2.1899506229168537, - "learning_rate": 9.822987154435557e-06, - "loss": 0.7408, + "epoch": 0.08, + "grad_norm": 0.8589318987832768, + "learning_rate": 9.944991049934047e-06, + "loss": 0.4554, "step": 1069 }, { - "epoch": 0.11, - "grad_norm": 2.0479852936546252, - "learning_rate": 9.82253742109609e-06, - "loss": 0.6622, + "epoch": 0.08, + "grad_norm": 2.1013614058080283, + "learning_rate": 9.944820925951403e-06, + "loss": 0.6036, "step": 1070 }, { - "epoch": 0.11, - "grad_norm": 2.7416313050535015, - "learning_rate": 9.822087127488167e-06, - "loss": 0.7698, + "epoch": 0.08, + "grad_norm": 1.8467886516297793, + "learning_rate": 9.944650540766564e-06, + "loss": 0.5506, "step": 1071 }, { - "epoch": 0.11, - "grad_norm": 2.105517261367347, - "learning_rate": 9.821636273664102e-06, - "loss": 0.6482, + "epoch": 0.08, + "grad_norm": 1.7721712961746898, + "learning_rate": 9.944479894388525e-06, + "loss": 0.6259, "step": 1072 }, { - "epoch": 0.11, - "grad_norm": 3.280144562837707, - "learning_rate": 9.821184859676269e-06, - "loss": 0.6423, + "epoch": 0.08, + "grad_norm": 1.7833551363556575, + "learning_rate": 9.944308986826307e-06, + "loss": 0.5991, "step": 1073 }, { - "epoch": 0.11, - "grad_norm": 4.134409942350642, - "learning_rate": 9.820732885577117e-06, - "loss": 0.6664, + "epoch": 0.08, + "grad_norm": 1.8629899170228683, + "learning_rate": 9.944137818088935e-06, + "loss": 0.6456, "step": 1074 }, { - "epoch": 0.11, - "grad_norm": 2.912802423096457, - "learning_rate": 9.820280351419155e-06, - "loss": 0.826, + "epoch": 0.08, + "grad_norm": 2.1191424301878587, + "learning_rate": 9.943966388185449e-06, + "loss": 0.5331, "step": 1075 }, { - "epoch": 0.11, - "grad_norm": 3.1928081836845386, - "learning_rate": 9.819827257254957e-06, - "loss": 0.7791, + "epoch": 0.08, + "grad_norm": 1.9581236917185463, + "learning_rate": 9.943794697124907e-06, + "loss": 0.5616, "step": 1076 }, { - "epoch": 0.11, - "grad_norm": 2.779451497550286, - "learning_rate": 9.81937360313716e-06, - "loss": 0.8068, + "epoch": 0.08, + "grad_norm": 1.9378629380033925, + "learning_rate": 9.943622744916377e-06, + "loss": 0.5834, "step": 1077 }, { - "epoch": 0.11, - "grad_norm": 3.32410870620609, - "learning_rate": 9.818919389118466e-06, - "loss": 0.7509, + "epoch": 0.08, + "grad_norm": 1.8417425553959619, + "learning_rate": 9.943450531568942e-06, + "loss": 0.6063, "step": 1078 }, { - "epoch": 0.11, - "grad_norm": 2.5323788910344027, - "learning_rate": 9.81846461525165e-06, - "loss": 0.7576, + "epoch": 0.08, + "grad_norm": 3.3334523100909927, + "learning_rate": 9.9432780570917e-06, + "loss": 0.6144, "step": 1079 }, { - "epoch": 0.11, - "grad_norm": 2.439458544255558, - "learning_rate": 9.818009281589545e-06, - "loss": 0.7202, + "epoch": 0.08, + "grad_norm": 1.954724376318318, + "learning_rate": 9.943105321493757e-06, + "loss": 0.6136, "step": 1080 }, { - "epoch": 0.11, - "grad_norm": 3.0195258140646195, - "learning_rate": 9.817553388185046e-06, - "loss": 0.6668, + "epoch": 0.08, + "grad_norm": 1.6835267446353195, + "learning_rate": 9.942932324784245e-06, + "loss": 0.6153, "step": 1081 }, { - "epoch": 0.11, - "grad_norm": 1.5034092301568605, - "learning_rate": 9.817096935091123e-06, - "loss": 0.6359, + "epoch": 0.08, + "grad_norm": 1.7712250919107193, + "learning_rate": 9.942759066972298e-06, + "loss": 0.5426, "step": 1082 }, { - "epoch": 0.11, - "grad_norm": 3.4584143921665347, - "learning_rate": 9.816639922360802e-06, - "loss": 0.8676, + "epoch": 0.08, + "grad_norm": 2.4004138739848844, + "learning_rate": 9.942585548067069e-06, + "loss": 0.6353, "step": 1083 }, { - "epoch": 0.11, - "grad_norm": 3.1995319474627197, - "learning_rate": 9.816182350047179e-06, - "loss": 0.6817, + "epoch": 0.08, + "grad_norm": 1.7704680960902197, + "learning_rate": 9.942411768077725e-06, + "loss": 0.5702, "step": 1084 }, { - "epoch": 0.11, - "grad_norm": 3.416376258425686, - "learning_rate": 9.815724218203411e-06, - "loss": 0.7575, + "epoch": 0.08, + "grad_norm": 2.7530620945215216, + "learning_rate": 9.942237727013441e-06, + "loss": 0.5896, "step": 1085 }, { - "epoch": 0.11, - "grad_norm": 3.4149722424830817, - "learning_rate": 9.815265526882726e-06, - "loss": 0.7805, + "epoch": 0.08, + "grad_norm": 1.7604046709983363, + "learning_rate": 9.942063424883415e-06, + "loss": 0.5988, "step": 1086 }, { - "epoch": 0.11, - "grad_norm": 2.3282208941621216, - "learning_rate": 9.814806276138412e-06, - "loss": 0.8052, + "epoch": 0.08, + "grad_norm": 2.0994886305309226, + "learning_rate": 9.941888861696854e-06, + "loss": 0.5625, "step": 1087 }, { - "epoch": 0.11, - "grad_norm": 2.4514470562750907, - "learning_rate": 9.81434646602382e-06, - "loss": 0.7267, + "epoch": 0.08, + "grad_norm": 2.0815867359440117, + "learning_rate": 9.941714037462978e-06, + "loss": 0.5451, "step": 1088 }, { - "epoch": 0.11, - "grad_norm": 2.976660270631451, - "learning_rate": 9.813886096592376e-06, - "loss": 0.6789, + "epoch": 0.08, + "grad_norm": 6.017174916020509, + "learning_rate": 9.94153895219102e-06, + "loss": 0.5847, "step": 1089 }, { - "epoch": 0.11, - "grad_norm": 2.0977217217797643, - "learning_rate": 9.81342516789756e-06, - "loss": 0.7351, + "epoch": 0.08, + "grad_norm": 1.7964656136664965, + "learning_rate": 9.941363605890231e-06, + "loss": 0.5787, "step": 1090 }, { - "epoch": 0.11, - "grad_norm": 16.753908355958515, - "learning_rate": 9.81296367999292e-06, - "loss": 0.6394, + "epoch": 0.08, + "grad_norm": 1.8878433405503776, + "learning_rate": 9.941187998569874e-06, + "loss": 0.6249, "step": 1091 }, { - "epoch": 0.11, - "grad_norm": 2.3815484353523138, - "learning_rate": 9.812501632932074e-06, - "loss": 0.7619, + "epoch": 0.08, + "grad_norm": 1.7354164545495139, + "learning_rate": 9.941012130239223e-06, + "loss": 0.6023, "step": 1092 }, { - "epoch": 0.12, - "grad_norm": 4.14830234781379, - "learning_rate": 9.8120390267687e-06, - "loss": 0.7141, + "epoch": 0.08, + "grad_norm": 1.7835679311166261, + "learning_rate": 9.940836000907567e-06, + "loss": 0.521, "step": 1093 }, { - "epoch": 0.12, - "grad_norm": 2.4341062988650624, - "learning_rate": 9.811575861556541e-06, - "loss": 0.776, + "epoch": 0.08, + "grad_norm": 1.6435070523996826, + "learning_rate": 9.940659610584214e-06, + "loss": 0.6245, "step": 1094 }, { - "epoch": 0.12, - "grad_norm": 2.290585984126302, - "learning_rate": 9.811112137349407e-06, - "loss": 0.8117, + "epoch": 0.08, + "grad_norm": 1.7810198784808215, + "learning_rate": 9.940482959278479e-06, + "loss": 0.534, "step": 1095 }, { - "epoch": 0.12, - "grad_norm": 2.555561103046378, - "learning_rate": 9.810647854201174e-06, - "loss": 0.748, + "epoch": 0.08, + "grad_norm": 1.717099991623071, + "learning_rate": 9.940306046999693e-06, + "loss": 0.5665, "step": 1096 }, { - "epoch": 0.12, - "grad_norm": 2.710466022236052, - "learning_rate": 9.81018301216578e-06, - "loss": 0.7782, + "epoch": 0.08, + "grad_norm": 1.5353589015000866, + "learning_rate": 9.940128873757199e-06, + "loss": 0.5935, "step": 1097 }, { - "epoch": 0.12, - "grad_norm": 3.3963252605264445, - "learning_rate": 9.809717611297227e-06, - "loss": 0.6976, + "epoch": 0.08, + "grad_norm": 2.360983253856564, + "learning_rate": 9.939951439560363e-06, + "loss": 0.6579, "step": 1098 }, { - "epoch": 0.12, - "grad_norm": 2.6354731547094254, - "learning_rate": 9.809251651649586e-06, - "loss": 0.8057, + "epoch": 0.08, + "grad_norm": 1.7229907418353716, + "learning_rate": 9.93977374441855e-06, + "loss": 0.6084, "step": 1099 }, { - "epoch": 0.12, - "grad_norm": 1.4944044637050682, - "learning_rate": 9.80878513327699e-06, - "loss": 0.6846, + "epoch": 0.08, + "grad_norm": 1.8055796260521704, + "learning_rate": 9.93959578834115e-06, + "loss": 0.5497, "step": 1100 }, { - "epoch": 0.12, - "grad_norm": 2.2779646813088203, - "learning_rate": 9.80831805623364e-06, - "loss": 0.6475, + "epoch": 0.08, + "grad_norm": 2.7722259243748724, + "learning_rate": 9.939417571337563e-06, + "loss": 0.5765, "step": 1101 }, { - "epoch": 0.12, - "grad_norm": 2.3609991882243317, - "learning_rate": 9.807850420573794e-06, - "loss": 0.6642, + "epoch": 0.08, + "grad_norm": 2.209330304793147, + "learning_rate": 9.939239093417205e-06, + "loss": 0.613, "step": 1102 }, { - "epoch": 0.12, - "grad_norm": 2.2887645711932185, - "learning_rate": 9.807382226351786e-06, - "loss": 0.7091, + "epoch": 0.08, + "grad_norm": 0.9045863058371468, + "learning_rate": 9.9390603545895e-06, + "loss": 0.4747, "step": 1103 }, { - "epoch": 0.12, - "grad_norm": 2.1955085410313404, - "learning_rate": 9.806913473622008e-06, - "loss": 0.6662, + "epoch": 0.08, + "grad_norm": 1.7971246380651944, + "learning_rate": 9.938881354863891e-06, + "loss": 0.5644, "step": 1104 }, { - "epoch": 0.12, - "grad_norm": 1.2036596843300829, - "learning_rate": 9.806444162438917e-06, - "loss": 0.6465, + "epoch": 0.08, + "grad_norm": 1.7226699472964218, + "learning_rate": 9.938702094249833e-06, + "loss": 0.5734, "step": 1105 }, { - "epoch": 0.12, - "grad_norm": 1.1349422385810124, - "learning_rate": 9.805974292857038e-06, - "loss": 0.6237, + "epoch": 0.08, + "grad_norm": 1.5528802933881127, + "learning_rate": 9.938522572756798e-06, + "loss": 0.5556, "step": 1106 }, { - "epoch": 0.12, - "grad_norm": 2.5361645121575336, - "learning_rate": 9.805503864930958e-06, - "loss": 0.9142, + "epoch": 0.08, + "grad_norm": 1.738750246448217, + "learning_rate": 9.938342790394264e-06, + "loss": 0.6161, "step": 1107 }, { - "epoch": 0.12, - "grad_norm": 3.0395622006277123, - "learning_rate": 9.80503287871533e-06, - "loss": 0.6835, + "epoch": 0.08, + "grad_norm": 2.436858321203428, + "learning_rate": 9.93816274717173e-06, + "loss": 0.5683, "step": 1108 }, { - "epoch": 0.12, - "grad_norm": 2.422353959280125, - "learning_rate": 9.804561334264872e-06, - "loss": 0.7633, + "epoch": 0.08, + "grad_norm": 1.6188958672116516, + "learning_rate": 9.937982443098711e-06, + "loss": 0.5591, "step": 1109 }, { - "epoch": 0.12, - "grad_norm": 2.5480040358296767, - "learning_rate": 9.804089231634368e-06, - "loss": 0.8203, + "epoch": 0.08, + "grad_norm": 2.068851672463039, + "learning_rate": 9.937801878184725e-06, + "loss": 0.6161, "step": 1110 }, { - "epoch": 0.12, - "grad_norm": 2.8729382273172983, - "learning_rate": 9.803616570878664e-06, - "loss": 0.6498, + "epoch": 0.08, + "grad_norm": 1.5476561188707283, + "learning_rate": 9.93762105243931e-06, + "loss": 0.5375, "step": 1111 }, { - "epoch": 0.12, - "grad_norm": 2.7811166027244028, - "learning_rate": 9.803143352052674e-06, - "loss": 0.5825, + "epoch": 0.08, + "grad_norm": 2.233636640510574, + "learning_rate": 9.937439965872023e-06, + "loss": 0.6375, "step": 1112 }, { - "epoch": 0.12, - "grad_norm": 2.12202620735347, - "learning_rate": 9.802669575211369e-06, - "loss": 0.6428, + "epoch": 0.08, + "grad_norm": 2.2153100538571717, + "learning_rate": 9.937258618492425e-06, + "loss": 0.6251, "step": 1113 }, { - "epoch": 0.12, - "grad_norm": 5.0006468325620315, - "learning_rate": 9.8021952404098e-06, - "loss": 0.7803, + "epoch": 0.08, + "grad_norm": 2.3580833559884424, + "learning_rate": 9.937077010310099e-06, + "loss": 0.6024, "step": 1114 }, { - "epoch": 0.12, - "grad_norm": 3.1558917309253327, - "learning_rate": 9.80172034770307e-06, - "loss": 0.7181, + "epoch": 0.08, + "grad_norm": 1.9381800937110203, + "learning_rate": 9.936895141334634e-06, + "loss": 0.6607, "step": 1115 }, { - "epoch": 0.12, - "grad_norm": 2.333799697036943, - "learning_rate": 9.801244897146348e-06, - "loss": 0.8098, + "epoch": 0.08, + "grad_norm": 1.9912594348204549, + "learning_rate": 9.93671301157564e-06, + "loss": 0.5838, "step": 1116 }, { - "epoch": 0.12, - "grad_norm": 2.3764518557927974, - "learning_rate": 9.800768888794874e-06, - "loss": 0.6982, + "epoch": 0.08, + "grad_norm": 1.65084905388092, + "learning_rate": 9.936530621042737e-06, + "loss": 0.6092, "step": 1117 }, { - "epoch": 0.12, - "grad_norm": 2.553267042004277, - "learning_rate": 9.800292322703949e-06, - "loss": 0.7525, + "epoch": 0.08, + "grad_norm": 1.683911459481241, + "learning_rate": 9.93634796974556e-06, + "loss": 0.5961, "step": 1118 }, { - "epoch": 0.12, - "grad_norm": 2.418379261387043, - "learning_rate": 9.799815198928937e-06, - "loss": 0.7675, + "epoch": 0.08, + "grad_norm": 1.9346497166748984, + "learning_rate": 9.936165057693756e-06, + "loss": 0.5546, "step": 1119 }, { - "epoch": 0.12, - "grad_norm": 2.8872564473960765, - "learning_rate": 9.79933751752527e-06, - "loss": 0.6478, + "epoch": 0.08, + "grad_norm": 1.846504101914356, + "learning_rate": 9.935981884896988e-06, + "loss": 0.6791, "step": 1120 }, { - "epoch": 0.12, - "grad_norm": 4.052357385896143, - "learning_rate": 9.798859278548443e-06, - "loss": 0.7681, + "epoch": 0.08, + "grad_norm": 1.7724285502998458, + "learning_rate": 9.935798451364932e-06, + "loss": 0.4809, "step": 1121 }, { - "epoch": 0.12, - "grad_norm": 2.2984285503416286, - "learning_rate": 9.798380482054019e-06, - "loss": 0.6921, + "epoch": 0.08, + "grad_norm": 1.8488035885343617, + "learning_rate": 9.935614757107274e-06, + "loss": 0.5465, "step": 1122 }, { - "epoch": 0.12, - "grad_norm": 2.422351898914631, - "learning_rate": 9.79790112809762e-06, - "loss": 0.7374, + "epoch": 0.08, + "grad_norm": 2.872094531405468, + "learning_rate": 9.935430802133723e-06, + "loss": 0.5425, "step": 1123 }, { - "epoch": 0.12, - "grad_norm": 2.2410791102035756, - "learning_rate": 9.797421216734938e-06, - "loss": 0.664, + "epoch": 0.08, + "grad_norm": 1.6113517570992404, + "learning_rate": 9.935246586453993e-06, + "loss": 0.5818, "step": 1124 }, { - "epoch": 0.12, - "grad_norm": 3.470129472972923, - "learning_rate": 9.796940748021727e-06, - "loss": 0.7462, + "epoch": 0.08, + "grad_norm": 2.22660263791639, + "learning_rate": 9.935062110077815e-06, + "loss": 0.5327, "step": 1125 }, { - "epoch": 0.12, - "grad_norm": 2.414314965943791, - "learning_rate": 9.796459722013804e-06, - "loss": 0.7995, + "epoch": 0.08, + "grad_norm": 2.1927501725989402, + "learning_rate": 9.934877373014935e-06, + "loss": 0.599, "step": 1126 }, { - "epoch": 0.12, - "grad_norm": 3.270258241210417, - "learning_rate": 9.795978138767059e-06, - "loss": 0.7509, + "epoch": 0.08, + "grad_norm": 2.0535723529000967, + "learning_rate": 9.93469237527511e-06, + "loss": 0.5966, "step": 1127 }, { - "epoch": 0.12, - "grad_norm": 2.353498649269498, - "learning_rate": 9.795495998337436e-06, - "loss": 0.8035, + "epoch": 0.08, + "grad_norm": 0.9977837344199024, + "learning_rate": 9.934507116868113e-06, + "loss": 0.4613, "step": 1128 }, { - "epoch": 0.12, - "grad_norm": 2.8698790645980807, - "learning_rate": 9.795013300780951e-06, - "loss": 0.7457, + "epoch": 0.08, + "grad_norm": 1.5215586819981877, + "learning_rate": 9.93432159780373e-06, + "loss": 0.5781, "step": 1129 }, { - "epoch": 0.12, - "grad_norm": 3.6109946770346673, - "learning_rate": 9.794530046153681e-06, - "loss": 0.7238, + "epoch": 0.08, + "grad_norm": 1.825181799742569, + "learning_rate": 9.934135818091759e-06, + "loss": 0.611, "step": 1130 }, { - "epoch": 0.12, - "grad_norm": 2.488339655347247, - "learning_rate": 9.79404623451177e-06, - "loss": 0.7118, + "epoch": 0.08, + "grad_norm": 1.866276369445615, + "learning_rate": 9.933949777742016e-06, + "loss": 0.5768, "step": 1131 }, { - "epoch": 0.12, - "grad_norm": 2.9913223737937966, - "learning_rate": 9.793561865911425e-06, - "loss": 0.76, + "epoch": 0.08, + "grad_norm": 1.8979232346918364, + "learning_rate": 9.933763476764326e-06, + "loss": 0.5588, "step": 1132 }, { - "epoch": 0.12, - "grad_norm": 5.4767066337380035, - "learning_rate": 9.793076940408921e-06, - "loss": 0.6867, + "epoch": 0.08, + "grad_norm": 4.478385878159797, + "learning_rate": 9.933576915168532e-06, + "loss": 0.5766, "step": 1133 }, { - "epoch": 0.12, - "grad_norm": 2.657626022399911, - "learning_rate": 9.792591458060592e-06, - "loss": 0.7424, + "epoch": 0.08, + "grad_norm": 2.295132543701005, + "learning_rate": 9.933390092964488e-06, + "loss": 0.6108, "step": 1134 }, { - "epoch": 0.12, - "grad_norm": 3.0031975051776887, - "learning_rate": 9.792105418922842e-06, - "loss": 0.6451, + "epoch": 0.08, + "grad_norm": 1.9107164350340187, + "learning_rate": 9.933203010162064e-06, + "loss": 0.573, "step": 1135 }, { - "epoch": 0.12, - "grad_norm": 2.515412387819786, - "learning_rate": 9.791618823052137e-06, - "loss": 0.654, + "epoch": 0.08, + "grad_norm": 1.735183249874809, + "learning_rate": 9.93301566677114e-06, + "loss": 0.584, "step": 1136 }, { - "epoch": 0.12, - "grad_norm": 2.2988508182868506, - "learning_rate": 9.791131670505008e-06, - "loss": 0.7053, + "epoch": 0.08, + "grad_norm": 1.8079158541631022, + "learning_rate": 9.932828062801614e-06, + "loss": 0.5957, "step": 1137 }, { - "epoch": 0.12, - "grad_norm": 3.392212351593364, - "learning_rate": 9.790643961338051e-06, - "loss": 0.6784, + "epoch": 0.08, + "grad_norm": 1.963365070271999, + "learning_rate": 9.932640198263394e-06, + "loss": 0.5687, "step": 1138 }, { - "epoch": 0.12, - "grad_norm": 2.3321628230545013, - "learning_rate": 9.790155695607927e-06, - "loss": 0.747, + "epoch": 0.08, + "grad_norm": 1.8180216504765323, + "learning_rate": 9.932452073166405e-06, + "loss": 0.6131, "step": 1139 }, { - "epoch": 0.12, - "grad_norm": 2.5520902735537194, - "learning_rate": 9.789666873371361e-06, - "loss": 0.7747, + "epoch": 0.08, + "grad_norm": 1.6566641514284106, + "learning_rate": 9.932263687520584e-06, + "loss": 0.5534, "step": 1140 }, { - "epoch": 0.12, - "grad_norm": 3.0561347446951537, - "learning_rate": 9.789177494685146e-06, - "loss": 0.7497, + "epoch": 0.08, + "grad_norm": 2.1486566775981664, + "learning_rate": 9.932075041335883e-06, + "loss": 0.5289, "step": 1141 }, { - "epoch": 0.12, - "grad_norm": 2.42722295869959, - "learning_rate": 9.788687559606131e-06, - "loss": 0.7141, + "epoch": 0.08, + "grad_norm": 0.9987534761509603, + "learning_rate": 9.931886134622264e-06, + "loss": 0.4631, "step": 1142 }, { - "epoch": 0.12, - "grad_norm": 3.087792011146286, - "learning_rate": 9.788197068191237e-06, - "loss": 0.78, + "epoch": 0.08, + "grad_norm": 1.6462619641048146, + "learning_rate": 9.93169696738971e-06, + "loss": 0.5451, "step": 1143 }, { - "epoch": 0.12, - "grad_norm": 2.742833380122242, - "learning_rate": 9.787706020497451e-06, - "loss": 0.702, + "epoch": 0.08, + "grad_norm": 0.9818807156548468, + "learning_rate": 9.931507539648211e-06, + "loss": 0.4503, "step": 1144 }, { - "epoch": 0.12, - "grad_norm": 2.2623934679268323, - "learning_rate": 9.787214416581818e-06, - "loss": 0.6702, + "epoch": 0.08, + "grad_norm": 1.692565068362615, + "learning_rate": 9.931317851407772e-06, + "loss": 0.6765, "step": 1145 }, { - "epoch": 0.12, - "grad_norm": 3.105793673298671, - "learning_rate": 9.786722256501454e-06, - "loss": 0.742, + "epoch": 0.08, + "grad_norm": 2.02382650616117, + "learning_rate": 9.931127902678415e-06, + "loss": 0.6334, "step": 1146 }, { - "epoch": 0.12, - "grad_norm": 2.771703607077318, - "learning_rate": 9.786229540313534e-06, - "loss": 0.6825, + "epoch": 0.08, + "grad_norm": 1.7480501384101033, + "learning_rate": 9.930937693470175e-06, + "loss": 0.5899, "step": 1147 }, { - "epoch": 0.12, - "grad_norm": 2.833220908887274, - "learning_rate": 9.785736268075303e-06, - "loss": 0.6402, + "epoch": 0.08, + "grad_norm": 1.779844091651691, + "learning_rate": 9.930747223793096e-06, + "loss": 0.5488, "step": 1148 }, { - "epoch": 0.12, - "grad_norm": 3.252609192328139, - "learning_rate": 9.785242439844064e-06, - "loss": 0.7542, + "epoch": 0.08, + "grad_norm": 1.88738072373629, + "learning_rate": 9.930556493657242e-06, + "loss": 0.5834, "step": 1149 }, { - "epoch": 0.12, - "grad_norm": 2.6298483732855993, - "learning_rate": 9.784748055677193e-06, - "loss": 0.758, + "epoch": 0.08, + "grad_norm": 1.857050056057343, + "learning_rate": 9.930365503072686e-06, + "loss": 0.5465, "step": 1150 }, { - "epoch": 0.12, - "grad_norm": 2.1632996471894916, - "learning_rate": 9.784253115632125e-06, - "loss": 0.7049, + "epoch": 0.08, + "grad_norm": 2.0678736367773176, + "learning_rate": 9.930174252049518e-06, + "loss": 0.5791, "step": 1151 }, { - "epoch": 0.12, - "grad_norm": 2.34611962892255, - "learning_rate": 9.783757619766359e-06, - "loss": 0.6605, + "epoch": 0.08, + "grad_norm": 2.6019639043718423, + "learning_rate": 9.92998274059784e-06, + "loss": 0.545, "step": 1152 }, { - "epoch": 0.12, - "grad_norm": 2.3000195548875086, - "learning_rate": 9.783261568137461e-06, - "loss": 0.6681, + "epoch": 0.08, + "grad_norm": 2.0098831227896823, + "learning_rate": 9.92979096872777e-06, + "loss": 0.602, "step": 1153 }, { - "epoch": 0.12, - "grad_norm": 2.644856326527659, - "learning_rate": 9.78276496080306e-06, - "loss": 0.7254, + "epoch": 0.08, + "grad_norm": 1.8225948621823336, + "learning_rate": 9.929598936449437e-06, + "loss": 0.5321, "step": 1154 }, { - "epoch": 0.12, - "grad_norm": 3.363577339292242, - "learning_rate": 9.782267797820852e-06, - "loss": 0.6255, + "epoch": 0.08, + "grad_norm": 1.9629747922778744, + "learning_rate": 9.929406643772983e-06, + "loss": 0.5637, "step": 1155 }, { - "epoch": 0.12, - "grad_norm": 2.178294432104343, - "learning_rate": 9.781770079248597e-06, - "loss": 0.6964, + "epoch": 0.08, + "grad_norm": 1.8221778911872186, + "learning_rate": 9.929214090708567e-06, + "loss": 0.5927, "step": 1156 }, { - "epoch": 0.12, - "grad_norm": 2.351307497143435, - "learning_rate": 9.781271805144115e-06, - "loss": 0.672, + "epoch": 0.08, + "grad_norm": 1.891479906155563, + "learning_rate": 9.929021277266359e-06, + "loss": 0.575, "step": 1157 }, { - "epoch": 0.12, - "grad_norm": 2.317111010522036, - "learning_rate": 9.780772975565297e-06, - "loss": 0.716, + "epoch": 0.08, + "grad_norm": 1.7005408430546565, + "learning_rate": 9.928828203456549e-06, + "loss": 0.6286, "step": 1158 }, { - "epoch": 0.12, - "grad_norm": 8.74680266959224, - "learning_rate": 9.780273590570095e-06, - "loss": 0.7632, + "epoch": 0.08, + "grad_norm": 1.767571233276235, + "learning_rate": 9.928634869289329e-06, + "loss": 0.5797, "step": 1159 }, { - "epoch": 0.12, - "grad_norm": 3.8612186188840654, - "learning_rate": 9.779773650216524e-06, - "loss": 0.688, + "epoch": 0.08, + "grad_norm": 1.6585840555713292, + "learning_rate": 9.928441274774917e-06, + "loss": 0.5638, "step": 1160 }, { - "epoch": 0.12, - "grad_norm": 3.73767312113061, - "learning_rate": 9.779273154562668e-06, - "loss": 0.732, + "epoch": 0.08, + "grad_norm": 1.7925367609906053, + "learning_rate": 9.928247419923534e-06, + "loss": 0.6125, "step": 1161 }, { - "epoch": 0.12, - "grad_norm": 2.195963737502883, - "learning_rate": 9.778772103666672e-06, - "loss": 0.7151, + "epoch": 0.08, + "grad_norm": 1.9102301823942354, + "learning_rate": 9.928053304745424e-06, + "loss": 0.5185, "step": 1162 }, { - "epoch": 0.12, - "grad_norm": 2.842525432869639, - "learning_rate": 9.778270497586747e-06, - "loss": 0.6845, + "epoch": 0.08, + "grad_norm": 2.135171401688203, + "learning_rate": 9.927858929250843e-06, + "loss": 0.6252, "step": 1163 }, { - "epoch": 0.12, - "grad_norm": 2.134809149072051, - "learning_rate": 9.77776833638117e-06, - "loss": 0.7597, + "epoch": 0.08, + "grad_norm": 1.759189982336816, + "learning_rate": 9.927664293450053e-06, + "loss": 0.5464, "step": 1164 }, { - "epoch": 0.12, - "grad_norm": 2.7009490835040846, - "learning_rate": 9.777265620108277e-06, - "loss": 0.7779, + "epoch": 0.08, + "grad_norm": 2.7766592990231076, + "learning_rate": 9.927469397353338e-06, + "loss": 0.5508, "step": 1165 }, { - "epoch": 0.12, - "grad_norm": 2.317959306339774, - "learning_rate": 9.776762348826474e-06, - "loss": 0.7794, + "epoch": 0.08, + "grad_norm": 2.217478741465085, + "learning_rate": 9.927274240970992e-06, + "loss": 0.6509, "step": 1166 }, { - "epoch": 0.12, - "grad_norm": 2.109072732996193, - "learning_rate": 9.776258522594231e-06, - "loss": 0.7258, + "epoch": 0.08, + "grad_norm": 1.8780759488027114, + "learning_rate": 9.927078824313325e-06, + "loss": 0.5601, "step": 1167 }, { - "epoch": 0.12, - "grad_norm": 2.1691103374743497, - "learning_rate": 9.775754141470077e-06, - "loss": 0.7235, + "epoch": 0.08, + "grad_norm": 1.623582245937414, + "learning_rate": 9.92688314739066e-06, + "loss": 0.5725, "step": 1168 }, { - "epoch": 0.12, - "grad_norm": 3.2658839932977033, - "learning_rate": 9.775249205512614e-06, - "loss": 0.7238, + "epoch": 0.08, + "grad_norm": 1.7754954655193163, + "learning_rate": 9.926687210213332e-06, + "loss": 0.5901, "step": 1169 }, { - "epoch": 0.12, - "grad_norm": 2.4497606850003515, - "learning_rate": 9.774743714780502e-06, - "loss": 0.7825, + "epoch": 0.08, + "grad_norm": 5.923314392363563, + "learning_rate": 9.926491012791693e-06, + "loss": 0.6128, "step": 1170 }, { - "epoch": 0.12, - "grad_norm": 2.7201508522085223, - "learning_rate": 9.774237669332467e-06, - "loss": 0.7096, + "epoch": 0.08, + "grad_norm": 1.795334476730306, + "learning_rate": 9.926294555136104e-06, + "loss": 0.6193, "step": 1171 }, { - "epoch": 0.12, - "grad_norm": 3.194344873698593, - "learning_rate": 9.7737310692273e-06, - "loss": 0.7806, + "epoch": 0.08, + "grad_norm": 1.6520751082260345, + "learning_rate": 9.926097837256945e-06, + "loss": 0.5444, "step": 1172 }, { - "epoch": 0.12, - "grad_norm": 2.4546802907254675, - "learning_rate": 9.77322391452386e-06, - "loss": 0.7075, + "epoch": 0.08, + "grad_norm": 1.749928391994692, + "learning_rate": 9.925900859164605e-06, + "loss": 0.6031, "step": 1173 }, { - "epoch": 0.12, - "grad_norm": 2.8217799648099753, - "learning_rate": 9.772716205281061e-06, - "loss": 0.6641, + "epoch": 0.08, + "grad_norm": 2.288482939641416, + "learning_rate": 9.925703620869493e-06, + "loss": 0.5724, "step": 1174 }, { - "epoch": 0.12, - "grad_norm": 3.3940574360026314, - "learning_rate": 9.772207941557889e-06, - "loss": 0.7329, + "epoch": 0.08, + "grad_norm": 1.7412790608109916, + "learning_rate": 9.925506122382022e-06, + "loss": 0.574, "step": 1175 }, { - "epoch": 0.12, - "grad_norm": 2.8617824561808356, - "learning_rate": 9.771699123413396e-06, - "loss": 0.6231, + "epoch": 0.08, + "grad_norm": 1.6897988131395134, + "learning_rate": 9.92530836371263e-06, + "loss": 0.5784, "step": 1176 }, { - "epoch": 0.12, - "grad_norm": 1.4154376282640087, - "learning_rate": 9.77118975090669e-06, - "loss": 0.6116, + "epoch": 0.08, + "grad_norm": 1.7802564119135693, + "learning_rate": 9.92511034487176e-06, + "loss": 0.5759, "step": 1177 }, { - "epoch": 0.12, - "grad_norm": 2.378924743383222, - "learning_rate": 9.770679824096952e-06, - "loss": 0.6989, + "epoch": 0.08, + "grad_norm": 1.714082777908771, + "learning_rate": 9.924912065869875e-06, + "loss": 0.5611, "step": 1178 }, { - "epoch": 0.12, - "grad_norm": 2.7001805333163587, - "learning_rate": 9.770169343043423e-06, - "loss": 0.7121, + "epoch": 0.08, + "grad_norm": 1.928814180599799, + "learning_rate": 9.924713526717444e-06, + "loss": 0.5761, "step": 1179 }, { - "epoch": 0.12, - "grad_norm": 2.5402821969530742, - "learning_rate": 9.769658307805408e-06, - "loss": 0.7407, + "epoch": 0.08, + "grad_norm": 1.967951884404924, + "learning_rate": 9.924514727424959e-06, + "loss": 0.5947, "step": 1180 }, { - "epoch": 0.12, - "grad_norm": 2.4849409843596, - "learning_rate": 9.769146718442279e-06, - "loss": 0.7695, + "epoch": 0.08, + "grad_norm": 2.227795091230408, + "learning_rate": 9.924315668002919e-06, + "loss": 0.6123, "step": 1181 }, { - "epoch": 0.12, - "grad_norm": 2.36462157041008, - "learning_rate": 9.76863457501347e-06, - "loss": 0.7366, + "epoch": 0.08, + "grad_norm": 1.8413322674707158, + "learning_rate": 9.924116348461839e-06, + "loss": 0.5827, "step": 1182 }, { - "epoch": 0.12, - "grad_norm": 3.5569947068679664, - "learning_rate": 9.76812187757848e-06, - "loss": 0.7354, + "epoch": 0.08, + "grad_norm": 1.954965713683045, + "learning_rate": 9.92391676881225e-06, + "loss": 0.5482, "step": 1183 }, { - "epoch": 0.12, - "grad_norm": 2.230897787375336, - "learning_rate": 9.767608626196877e-06, - "loss": 0.6896, + "epoch": 0.08, + "grad_norm": 2.22211616620053, + "learning_rate": 9.92371692906469e-06, + "loss": 0.5996, "step": 1184 }, { - "epoch": 0.12, - "grad_norm": 2.322170991929863, - "learning_rate": 9.767094820928282e-06, - "loss": 0.7377, + "epoch": 0.08, + "grad_norm": 1.6982658882041028, + "learning_rate": 9.92351682922972e-06, + "loss": 0.566, "step": 1185 }, { - "epoch": 0.12, - "grad_norm": 4.766972909981728, - "learning_rate": 9.766580461832391e-06, - "loss": 0.7322, + "epoch": 0.08, + "grad_norm": 1.6806749351358607, + "learning_rate": 9.923316469317907e-06, + "loss": 0.5706, "step": 1186 }, { - "epoch": 0.12, - "grad_norm": 1.9936899282121772, - "learning_rate": 9.766065548968962e-06, - "loss": 0.7133, + "epoch": 0.08, + "grad_norm": 2.0701633949252516, + "learning_rate": 9.923115849339837e-06, + "loss": 0.5618, "step": 1187 }, { - "epoch": 0.13, - "grad_norm": 2.9245738454667074, - "learning_rate": 9.765550082397815e-06, - "loss": 0.7225, + "epoch": 0.08, + "grad_norm": 1.7105605748455022, + "learning_rate": 9.922914969306105e-06, + "loss": 0.636, "step": 1188 }, { - "epoch": 0.13, - "grad_norm": 2.9105025784741505, - "learning_rate": 9.765034062178836e-06, - "loss": 0.773, + "epoch": 0.08, + "grad_norm": 2.369548798387799, + "learning_rate": 9.922713829227323e-06, + "loss": 0.5742, "step": 1189 }, { - "epoch": 0.13, - "grad_norm": 2.3491105179808205, - "learning_rate": 9.764517488371971e-06, - "loss": 0.7345, + "epoch": 0.08, + "grad_norm": 1.6367537955058469, + "learning_rate": 9.922512429114118e-06, + "loss": 0.5198, "step": 1190 }, { - "epoch": 0.13, - "grad_norm": 3.042045767945915, - "learning_rate": 9.76400036103724e-06, - "loss": 0.6984, + "epoch": 0.08, + "grad_norm": 1.3541741691695732, + "learning_rate": 9.922310768977124e-06, + "loss": 0.4317, "step": 1191 }, { - "epoch": 0.13, - "grad_norm": 2.452386829371169, - "learning_rate": 9.763482680234718e-06, - "loss": 0.7039, + "epoch": 0.08, + "grad_norm": 1.885736070735759, + "learning_rate": 9.922108848826997e-06, + "loss": 0.5651, "step": 1192 }, { - "epoch": 0.13, - "grad_norm": 2.3004751526217735, - "learning_rate": 9.762964446024547e-06, - "loss": 0.7878, + "epoch": 0.08, + "grad_norm": 1.7935770349506805, + "learning_rate": 9.921906668674402e-06, + "loss": 0.6909, "step": 1193 }, { - "epoch": 0.13, - "grad_norm": 1.3235197535321637, - "learning_rate": 9.762445658466935e-06, - "loss": 0.6694, + "epoch": 0.08, + "grad_norm": 1.684495593029889, + "learning_rate": 9.92170422853002e-06, + "loss": 0.5933, "step": 1194 }, { - "epoch": 0.13, - "grad_norm": 2.539843716603838, - "learning_rate": 9.761926317622154e-06, - "loss": 0.741, + "epoch": 0.08, + "grad_norm": 1.6077915450346685, + "learning_rate": 9.921501528404544e-06, + "loss": 0.5379, "step": 1195 }, { - "epoch": 0.13, - "grad_norm": 2.849640472644429, - "learning_rate": 9.761406423550539e-06, - "loss": 0.7408, + "epoch": 0.08, + "grad_norm": 1.6820414805856694, + "learning_rate": 9.92129856830868e-06, + "loss": 0.4628, "step": 1196 }, { - "epoch": 0.13, - "grad_norm": 2.5166258673564514, - "learning_rate": 9.760885976312488e-06, - "loss": 0.7223, + "epoch": 0.08, + "grad_norm": 1.5906974631942323, + "learning_rate": 9.92109534825315e-06, + "loss": 0.5545, "step": 1197 }, { - "epoch": 0.13, - "grad_norm": 2.725239057841064, - "learning_rate": 9.760364975968469e-06, - "loss": 0.7007, + "epoch": 0.09, + "grad_norm": 2.216652218852187, + "learning_rate": 9.92089186824869e-06, + "loss": 0.652, "step": 1198 }, { - "epoch": 0.13, - "grad_norm": 6.7294366804059464, - "learning_rate": 9.759843422579005e-06, - "loss": 0.6748, + "epoch": 0.09, + "grad_norm": 1.950804019470793, + "learning_rate": 9.920688128306046e-06, + "loss": 0.5886, "step": 1199 }, { - "epoch": 0.13, - "grad_norm": 2.7738891360490694, - "learning_rate": 9.759321316204693e-06, - "loss": 0.6751, + "epoch": 0.09, + "grad_norm": 1.6993982162411818, + "learning_rate": 9.92048412843598e-06, + "loss": 0.5734, "step": 1200 }, { - "epoch": 0.13, - "grad_norm": 16.17668799017805, - "learning_rate": 9.758798656906187e-06, - "loss": 0.7039, + "epoch": 0.09, + "grad_norm": 1.8028462829338006, + "learning_rate": 9.920279868649272e-06, + "loss": 0.5803, "step": 1201 }, { - "epoch": 0.13, - "grad_norm": 2.8538609592861137, - "learning_rate": 9.758275444744211e-06, - "loss": 0.7457, + "epoch": 0.09, + "grad_norm": 1.9798459677195217, + "learning_rate": 9.920075348956709e-06, + "loss": 0.6292, "step": 1202 }, { - "epoch": 0.13, - "grad_norm": 2.8412177031201455, - "learning_rate": 9.757751679779549e-06, - "loss": 0.6996, + "epoch": 0.09, + "grad_norm": 2.571983882288428, + "learning_rate": 9.919870569369094e-06, + "loss": 0.5736, "step": 1203 }, { - "epoch": 0.13, - "grad_norm": 2.4794823846058103, - "learning_rate": 9.757227362073048e-06, - "loss": 0.7336, + "epoch": 0.09, + "grad_norm": 1.6066794568245326, + "learning_rate": 9.919665529897244e-06, + "loss": 0.5504, "step": 1204 }, { - "epoch": 0.13, - "grad_norm": 1.9919272034069202, - "learning_rate": 9.756702491685626e-06, - "loss": 0.7235, + "epoch": 0.09, + "grad_norm": 1.981904090340616, + "learning_rate": 9.919460230551991e-06, + "loss": 0.5939, "step": 1205 }, { - "epoch": 0.13, - "grad_norm": 2.682936702377847, - "learning_rate": 9.756177068678258e-06, - "loss": 0.6468, + "epoch": 0.09, + "grad_norm": 2.7284094091991133, + "learning_rate": 9.919254671344181e-06, + "loss": 0.5978, "step": 1206 }, { - "epoch": 0.13, - "grad_norm": 2.2979583262392227, - "learning_rate": 9.755651093111987e-06, - "loss": 0.7306, + "epoch": 0.09, + "grad_norm": 3.567729956266612, + "learning_rate": 9.919048852284667e-06, + "loss": 0.5488, "step": 1207 }, { - "epoch": 0.13, - "grad_norm": 5.631349641332311, - "learning_rate": 9.755124565047918e-06, - "loss": 0.7321, + "epoch": 0.09, + "grad_norm": 1.1410972838154188, + "learning_rate": 9.918842773384327e-06, + "loss": 0.4573, "step": 1208 }, { - "epoch": 0.13, - "grad_norm": 2.09817214920078, - "learning_rate": 9.754597484547223e-06, - "loss": 0.6638, + "epoch": 0.09, + "grad_norm": 1.8990024563537673, + "learning_rate": 9.918636434654046e-06, + "loss": 0.6171, "step": 1209 }, { - "epoch": 0.13, - "grad_norm": 2.909553694559456, - "learning_rate": 9.754069851671138e-06, - "loss": 0.7271, + "epoch": 0.09, + "grad_norm": 1.3866009957114651, + "learning_rate": 9.918429836104719e-06, + "loss": 0.5386, "step": 1210 }, { - "epoch": 0.13, - "grad_norm": 3.0625344333106828, - "learning_rate": 9.753541666480959e-06, - "loss": 0.6552, + "epoch": 0.09, + "grad_norm": 2.015041694448049, + "learning_rate": 9.918222977747263e-06, + "loss": 0.5682, "step": 1211 }, { - "epoch": 0.13, - "grad_norm": 3.2516687148488987, - "learning_rate": 9.75301292903805e-06, - "loss": 0.7763, + "epoch": 0.09, + "grad_norm": 1.830201846485847, + "learning_rate": 9.918015859592603e-06, + "loss": 0.64, "step": 1212 }, { - "epoch": 0.13, - "grad_norm": 3.0525856091150914, - "learning_rate": 9.752483639403839e-06, - "loss": 0.7237, + "epoch": 0.09, + "grad_norm": 1.6551911666505976, + "learning_rate": 9.917808481651682e-06, + "loss": 0.6585, "step": 1213 }, { - "epoch": 0.13, - "grad_norm": 1.3517703119170414, - "learning_rate": 9.751953797639817e-06, - "loss": 0.6346, + "epoch": 0.09, + "grad_norm": 2.0116896322533435, + "learning_rate": 9.917600843935453e-06, + "loss": 0.5673, "step": 1214 }, { - "epoch": 0.13, - "grad_norm": 2.5336050253735287, - "learning_rate": 9.751423403807539e-06, - "loss": 0.7032, + "epoch": 0.09, + "grad_norm": 2.255024758048306, + "learning_rate": 9.917392946454885e-06, + "loss": 0.6215, "step": 1215 }, { - "epoch": 0.13, - "grad_norm": 2.274623839914132, - "learning_rate": 9.750892457968626e-06, - "loss": 0.7113, + "epoch": 0.09, + "grad_norm": 2.07188563069938, + "learning_rate": 9.91718478922096e-06, + "loss": 0.6746, "step": 1216 }, { - "epoch": 0.13, - "grad_norm": 2.502789805441234, - "learning_rate": 9.75036096018476e-06, - "loss": 0.6948, + "epoch": 0.09, + "grad_norm": 1.6651976778973394, + "learning_rate": 9.916976372244671e-06, + "loss": 0.6075, "step": 1217 }, { - "epoch": 0.13, - "grad_norm": 2.362716780752687, - "learning_rate": 9.749828910517688e-06, - "loss": 0.6797, + "epoch": 0.09, + "grad_norm": 1.0984520744620285, + "learning_rate": 9.916767695537028e-06, + "loss": 0.4631, "step": 1218 }, { - "epoch": 0.13, - "grad_norm": 1.9013855117687954, - "learning_rate": 9.749296309029224e-06, - "loss": 0.7267, + "epoch": 0.09, + "grad_norm": 1.7370466259576671, + "learning_rate": 9.916558759109058e-06, + "loss": 0.6415, "step": 1219 }, { - "epoch": 0.13, - "grad_norm": 2.5275074129127404, - "learning_rate": 9.748763155781244e-06, - "loss": 0.719, + "epoch": 0.09, + "grad_norm": 2.759782095316019, + "learning_rate": 9.916349562971793e-06, + "loss": 0.6117, "step": 1220 }, { - "epoch": 0.13, - "grad_norm": 3.7639603698409827, - "learning_rate": 9.748229450835689e-06, - "loss": 0.6755, + "epoch": 0.09, + "grad_norm": 1.7408920182263712, + "learning_rate": 9.916140107136286e-06, + "loss": 0.5608, "step": 1221 }, { - "epoch": 0.13, - "grad_norm": 2.8030238737546624, - "learning_rate": 9.747695194254561e-06, - "loss": 0.7427, + "epoch": 0.09, + "grad_norm": 0.9759958349596218, + "learning_rate": 9.9159303916136e-06, + "loss": 0.46, "step": 1222 }, { - "epoch": 0.13, - "grad_norm": 2.820939766808103, - "learning_rate": 9.74716038609993e-06, - "loss": 0.7745, + "epoch": 0.09, + "grad_norm": 1.7995879385908335, + "learning_rate": 9.91572041641481e-06, + "loss": 0.6024, "step": 1223 }, { - "epoch": 0.13, - "grad_norm": 2.48939064756324, - "learning_rate": 9.746625026433929e-06, - "loss": 0.6752, + "epoch": 0.09, + "grad_norm": 1.7615528811689827, + "learning_rate": 9.915510181551015e-06, + "loss": 0.556, "step": 1224 }, { - "epoch": 0.13, - "grad_norm": 2.7729898492722955, - "learning_rate": 9.746089115318751e-06, - "loss": 0.6633, + "epoch": 0.09, + "grad_norm": 2.333570123917567, + "learning_rate": 9.915299687033315e-06, + "loss": 0.6071, "step": 1225 }, { - "epoch": 0.13, - "grad_norm": 2.5642579652047615, - "learning_rate": 9.745552652816662e-06, - "loss": 0.6911, + "epoch": 0.09, + "grad_norm": 1.949082528809982, + "learning_rate": 9.915088932872831e-06, + "loss": 0.6253, "step": 1226 }, { - "epoch": 0.13, - "grad_norm": 2.479378462117706, - "learning_rate": 9.74501563898998e-06, - "loss": 0.6961, + "epoch": 0.09, + "grad_norm": 1.8360940132115062, + "learning_rate": 9.914877919080695e-06, + "loss": 0.5912, "step": 1227 }, { - "epoch": 0.13, - "grad_norm": 3.517261027107456, - "learning_rate": 9.744478073901102e-06, - "loss": 0.6875, + "epoch": 0.09, + "grad_norm": 1.750614397700351, + "learning_rate": 9.914666645668052e-06, + "loss": 0.6334, "step": 1228 }, { - "epoch": 0.13, - "grad_norm": 4.143063814038163, - "learning_rate": 9.743939957612473e-06, - "loss": 0.7862, + "epoch": 0.09, + "grad_norm": 1.6776782891901605, + "learning_rate": 9.914455112646065e-06, + "loss": 0.6117, "step": 1229 }, { - "epoch": 0.13, - "grad_norm": 2.186066134453419, - "learning_rate": 9.743401290186615e-06, - "loss": 0.6905, + "epoch": 0.09, + "grad_norm": 1.7194721920488816, + "learning_rate": 9.914243320025905e-06, + "loss": 0.6016, "step": 1230 }, { - "epoch": 0.13, - "grad_norm": 2.4988157638795383, - "learning_rate": 9.742862071686105e-06, - "loss": 0.6065, + "epoch": 0.09, + "grad_norm": 1.6936774213108488, + "learning_rate": 9.914031267818762e-06, + "loss": 0.6329, "step": 1231 }, { - "epoch": 0.13, - "grad_norm": 2.944980331009129, - "learning_rate": 9.742322302173591e-06, - "loss": 0.8424, + "epoch": 0.09, + "grad_norm": 1.846935203238646, + "learning_rate": 9.913818956035836e-06, + "loss": 0.5812, "step": 1232 }, { - "epoch": 0.13, - "grad_norm": 2.4840746884722997, - "learning_rate": 9.74178198171178e-06, - "loss": 0.7242, + "epoch": 0.09, + "grad_norm": 2.062047162358173, + "learning_rate": 9.913606384688344e-06, + "loss": 0.6091, "step": 1233 }, { - "epoch": 0.13, - "grad_norm": 2.3564467576176615, - "learning_rate": 9.741241110363446e-06, - "loss": 0.5671, + "epoch": 0.09, + "grad_norm": 1.8408822602926813, + "learning_rate": 9.913393553787512e-06, + "loss": 0.5937, "step": 1234 }, { - "epoch": 0.13, - "grad_norm": 2.5428398872084914, - "learning_rate": 9.740699688191426e-06, - "loss": 0.7558, + "epoch": 0.09, + "grad_norm": 2.037736879102753, + "learning_rate": 9.913180463344583e-06, + "loss": 0.5851, "step": 1235 }, { - "epoch": 0.13, - "grad_norm": 2.987401358325348, - "learning_rate": 9.74015771525862e-06, - "loss": 0.7976, + "epoch": 0.09, + "grad_norm": 1.8710903522498417, + "learning_rate": 9.912967113370814e-06, + "loss": 0.5807, "step": 1236 }, { - "epoch": 0.13, - "grad_norm": 2.288527677436737, - "learning_rate": 9.73961519162799e-06, - "loss": 0.7495, + "epoch": 0.09, + "grad_norm": 2.017638473146537, + "learning_rate": 9.912753503877475e-06, + "loss": 0.596, "step": 1237 }, { - "epoch": 0.13, - "grad_norm": 3.203656023458977, - "learning_rate": 9.739072117362572e-06, - "loss": 0.6546, + "epoch": 0.09, + "grad_norm": 1.6605641861866103, + "learning_rate": 9.912539634875851e-06, + "loss": 0.5811, "step": 1238 }, { - "epoch": 0.13, - "grad_norm": 2.893448106608211, - "learning_rate": 9.738528492525454e-06, - "loss": 0.7536, + "epoch": 0.09, + "grad_norm": 2.2397582130518354, + "learning_rate": 9.912325506377236e-06, + "loss": 0.5626, "step": 1239 }, { - "epoch": 0.13, - "grad_norm": 2.562798821263755, - "learning_rate": 9.737984317179792e-06, - "loss": 0.6901, + "epoch": 0.09, + "grad_norm": 1.887085337247806, + "learning_rate": 9.912111118392942e-06, + "loss": 0.6438, "step": 1240 }, { - "epoch": 0.13, - "grad_norm": 2.525831897954803, - "learning_rate": 9.737439591388808e-06, - "loss": 0.7072, + "epoch": 0.09, + "grad_norm": 2.003306966785893, + "learning_rate": 9.911896470934294e-06, + "loss": 0.6206, "step": 1241 }, { - "epoch": 0.13, - "grad_norm": 2.390376616064533, - "learning_rate": 9.736894315215788e-06, - "loss": 0.6709, + "epoch": 0.09, + "grad_norm": 1.6606324803064856, + "learning_rate": 9.911681564012633e-06, + "loss": 0.5664, "step": 1242 }, { - "epoch": 0.13, - "grad_norm": 2.290595026707291, - "learning_rate": 9.736348488724078e-06, - "loss": 0.7022, + "epoch": 0.09, + "grad_norm": 1.9489965498868007, + "learning_rate": 9.911466397639306e-06, + "loss": 0.5833, "step": 1243 }, { - "epoch": 0.13, - "grad_norm": 3.0135726421842928, - "learning_rate": 9.735802111977093e-06, - "loss": 0.7398, + "epoch": 0.09, + "grad_norm": 2.1072711104629858, + "learning_rate": 9.911250971825682e-06, + "loss": 0.5593, "step": 1244 }, { - "epoch": 0.13, - "grad_norm": 6.4970745285376745, - "learning_rate": 9.735255185038308e-06, - "loss": 0.7238, + "epoch": 0.09, + "grad_norm": 1.8153025619281533, + "learning_rate": 9.911035286583141e-06, + "loss": 0.6293, "step": 1245 }, { - "epoch": 0.13, - "grad_norm": 2.5294324112992945, - "learning_rate": 9.734707707971265e-06, - "loss": 0.6654, + "epoch": 0.09, + "grad_norm": 1.615777170245443, + "learning_rate": 9.910819341923074e-06, + "loss": 0.5803, "step": 1246 }, { - "epoch": 0.13, - "grad_norm": 4.769086981727464, - "learning_rate": 9.734159680839566e-06, - "loss": 0.7625, + "epoch": 0.09, + "grad_norm": 1.6433745346455484, + "learning_rate": 9.910603137856892e-06, + "loss": 0.5881, "step": 1247 }, { - "epoch": 0.13, - "grad_norm": 2.425192205003877, - "learning_rate": 9.733611103706882e-06, - "loss": 0.7288, + "epoch": 0.09, + "grad_norm": 1.788186080786205, + "learning_rate": 9.91038667439601e-06, + "loss": 0.5882, "step": 1248 }, { - "epoch": 0.13, - "grad_norm": 2.4432588819312016, - "learning_rate": 9.73306197663694e-06, - "loss": 0.7453, + "epoch": 0.09, + "grad_norm": 1.7964366481156733, + "learning_rate": 9.910169951551867e-06, + "loss": 0.6109, "step": 1249 }, { - "epoch": 0.13, - "grad_norm": 2.4916017795323224, - "learning_rate": 9.732512299693542e-06, - "loss": 0.6687, + "epoch": 0.09, + "grad_norm": 1.4686137498066427, + "learning_rate": 9.909952969335908e-06, + "loss": 0.5142, "step": 1250 }, { - "epoch": 0.13, - "grad_norm": 6.171083266500421, - "learning_rate": 9.731962072940545e-06, - "loss": 0.7657, + "epoch": 0.09, + "grad_norm": 1.9730179384567703, + "learning_rate": 9.909735727759597e-06, + "loss": 0.5083, "step": 1251 }, { - "epoch": 0.13, - "grad_norm": 2.2828106585662606, - "learning_rate": 9.731411296441873e-06, - "loss": 0.7871, + "epoch": 0.09, + "grad_norm": 1.747316190622302, + "learning_rate": 9.909518226834407e-06, + "loss": 0.6054, "step": 1252 }, { - "epoch": 0.13, - "grad_norm": 6.2796867996521595, - "learning_rate": 9.730859970261514e-06, - "loss": 0.7616, + "epoch": 0.09, + "grad_norm": 1.7546645665742489, + "learning_rate": 9.90930046657183e-06, + "loss": 0.6305, "step": 1253 }, { - "epoch": 0.13, - "grad_norm": 2.9365596197667454, - "learning_rate": 9.730308094463519e-06, - "loss": 0.7857, + "epoch": 0.09, + "grad_norm": 1.8919168767792636, + "learning_rate": 9.909082446983366e-06, + "loss": 0.5916, "step": 1254 }, { - "epoch": 0.13, - "grad_norm": 2.571904631777549, - "learning_rate": 9.729755669112003e-06, - "loss": 0.7116, + "epoch": 0.09, + "grad_norm": 2.081472423914304, + "learning_rate": 9.908864168080535e-06, + "loss": 0.5986, "step": 1255 }, { - "epoch": 0.13, - "grad_norm": 2.5467525821131054, - "learning_rate": 9.729202694271145e-06, - "loss": 0.6199, + "epoch": 0.09, + "grad_norm": 0.9349120666792797, + "learning_rate": 9.908645629874864e-06, + "loss": 0.4466, "step": 1256 }, { - "epoch": 0.13, - "grad_norm": 2.2962244446782196, - "learning_rate": 9.72864917000519e-06, - "loss": 0.758, + "epoch": 0.09, + "grad_norm": 1.0599331890168222, + "learning_rate": 9.908426832377898e-06, + "loss": 0.481, "step": 1257 }, { - "epoch": 0.13, - "grad_norm": 3.140094567957693, - "learning_rate": 9.728095096378443e-06, - "loss": 0.7457, + "epoch": 0.09, + "grad_norm": 2.2925254598846565, + "learning_rate": 9.908207775601194e-06, + "loss": 0.6263, "step": 1258 }, { - "epoch": 0.13, - "grad_norm": 2.4307560819993066, - "learning_rate": 9.727540473455277e-06, - "loss": 0.7374, + "epoch": 0.09, + "grad_norm": 1.6594436051462882, + "learning_rate": 9.907988459556324e-06, + "loss": 0.5469, "step": 1259 }, { - "epoch": 0.13, - "grad_norm": 3.4574501390436145, - "learning_rate": 9.726985301300122e-06, - "loss": 0.7201, + "epoch": 0.09, + "grad_norm": 2.480169998601102, + "learning_rate": 9.907768884254874e-06, + "loss": 0.6157, "step": 1260 }, { - "epoch": 0.13, - "grad_norm": 2.5770663455788885, - "learning_rate": 9.72642957997748e-06, - "loss": 0.6912, + "epoch": 0.09, + "grad_norm": 3.1718315172992004, + "learning_rate": 9.907549049708442e-06, + "loss": 0.5684, "step": 1261 }, { - "epoch": 0.13, - "grad_norm": 2.6051783139637252, - "learning_rate": 9.725873309551915e-06, - "loss": 0.7906, + "epoch": 0.09, + "grad_norm": 1.7429427329173932, + "learning_rate": 9.907328955928639e-06, + "loss": 0.5549, "step": 1262 }, { - "epoch": 0.13, - "grad_norm": 2.6247042237572757, - "learning_rate": 9.725316490088046e-06, - "loss": 0.7596, + "epoch": 0.09, + "grad_norm": 1.7503000368204438, + "learning_rate": 9.907108602927092e-06, + "loss": 0.6309, "step": 1263 }, { - "epoch": 0.13, - "grad_norm": 2.208978552737289, - "learning_rate": 9.724759121650569e-06, - "loss": 0.6893, + "epoch": 0.09, + "grad_norm": 1.9780421908216177, + "learning_rate": 9.906887990715442e-06, + "loss": 0.5322, "step": 1264 }, { - "epoch": 0.13, - "grad_norm": 2.818237537685943, - "learning_rate": 9.724201204304234e-06, - "loss": 0.6509, + "epoch": 0.09, + "grad_norm": 1.6849970699653378, + "learning_rate": 9.906667119305342e-06, + "loss": 0.5814, "step": 1265 }, { - "epoch": 0.13, - "grad_norm": 3.516551806674273, - "learning_rate": 9.72364273811386e-06, - "loss": 0.8091, + "epoch": 0.09, + "grad_norm": 1.6880254798309657, + "learning_rate": 9.906445988708458e-06, + "loss": 0.5648, "step": 1266 }, { - "epoch": 0.13, - "grad_norm": 2.8775423326241003, - "learning_rate": 9.723083723144326e-06, - "loss": 0.6664, + "epoch": 0.09, + "grad_norm": 1.2281946009392568, + "learning_rate": 9.906224598936471e-06, + "loss": 0.4574, "step": 1267 }, { - "epoch": 0.13, - "grad_norm": 3.1104989369101292, - "learning_rate": 9.722524159460579e-06, - "loss": 0.7484, + "epoch": 0.09, + "grad_norm": 2.5832065826218846, + "learning_rate": 9.906002950001076e-06, + "loss": 0.5781, "step": 1268 }, { - "epoch": 0.13, - "grad_norm": 2.284433621967399, - "learning_rate": 9.721964047127627e-06, - "loss": 0.7694, + "epoch": 0.09, + "grad_norm": 1.8006861456662047, + "learning_rate": 9.905781041913983e-06, + "loss": 0.6056, "step": 1269 }, { - "epoch": 0.13, - "grad_norm": 3.528762803348098, - "learning_rate": 9.721403386210542e-06, - "loss": 0.7047, + "epoch": 0.09, + "grad_norm": 1.8845317379449849, + "learning_rate": 9.905558874686912e-06, + "loss": 0.5648, "step": 1270 }, { - "epoch": 0.13, - "grad_norm": 2.3698967383567053, - "learning_rate": 9.720842176774458e-06, - "loss": 0.7303, + "epoch": 0.09, + "grad_norm": 1.797843254627934, + "learning_rate": 9.905336448331598e-06, + "loss": 0.5559, "step": 1271 }, { - "epoch": 0.13, - "grad_norm": 2.2862180267688195, - "learning_rate": 9.720280418884578e-06, - "loss": 0.6719, + "epoch": 0.09, + "grad_norm": 1.7881199631167275, + "learning_rate": 9.905113762859792e-06, + "loss": 0.5523, "step": 1272 }, { - "epoch": 0.13, - "grad_norm": 2.585341957705507, - "learning_rate": 9.719718112606163e-06, - "loss": 0.6893, + "epoch": 0.09, + "grad_norm": 0.8603688583328809, + "learning_rate": 9.904890818283255e-06, + "loss": 0.4942, "step": 1273 }, { - "epoch": 0.13, - "grad_norm": 2.931292145704906, - "learning_rate": 9.719155258004542e-06, - "loss": 0.7129, + "epoch": 0.09, + "grad_norm": 1.6406935850055777, + "learning_rate": 9.904667614613766e-06, + "loss": 0.5088, "step": 1274 }, { - "epoch": 0.13, - "grad_norm": 2.930050695251463, - "learning_rate": 9.718591855145105e-06, - "loss": 0.7024, + "epoch": 0.09, + "grad_norm": 1.7478706271146758, + "learning_rate": 9.904444151863116e-06, + "loss": 0.6329, "step": 1275 }, { - "epoch": 0.13, - "grad_norm": 2.3725890130265377, - "learning_rate": 9.718027904093306e-06, - "loss": 0.6779, + "epoch": 0.09, + "grad_norm": 2.0602155586791855, + "learning_rate": 9.904220430043104e-06, + "loss": 0.6554, "step": 1276 }, { - "epoch": 0.13, - "grad_norm": 2.667713730201913, - "learning_rate": 9.717463404914661e-06, - "loss": 0.7128, + "epoch": 0.09, + "grad_norm": 2.2906190483613673, + "learning_rate": 9.903996449165552e-06, + "loss": 0.6348, "step": 1277 }, { - "epoch": 0.13, - "grad_norm": 2.4476996596913376, - "learning_rate": 9.716898357674757e-06, - "loss": 0.725, + "epoch": 0.09, + "grad_norm": 1.7824152376294604, + "learning_rate": 9.903772209242291e-06, + "loss": 0.5867, "step": 1278 }, { - "epoch": 0.13, - "grad_norm": 2.286403952573892, - "learning_rate": 9.716332762439238e-06, - "loss": 0.6882, + "epoch": 0.09, + "grad_norm": 1.831351594269644, + "learning_rate": 9.903547710285166e-06, + "loss": 0.5791, "step": 1279 }, { - "epoch": 0.13, - "grad_norm": 2.187379321806065, - "learning_rate": 9.71576661927381e-06, - "loss": 0.6514, + "epoch": 0.09, + "grad_norm": 2.398692158589471, + "learning_rate": 9.903322952306036e-06, + "loss": 0.591, "step": 1280 }, { - "epoch": 0.13, - "grad_norm": 8.048461731248665, - "learning_rate": 9.71519992824425e-06, - "loss": 0.69, + "epoch": 0.09, + "grad_norm": 1.8724691409790362, + "learning_rate": 9.903097935316772e-06, + "loss": 0.6094, "step": 1281 }, { - "epoch": 0.13, - "grad_norm": 2.4471121642879634, - "learning_rate": 9.714632689416392e-06, - "loss": 0.6853, + "epoch": 0.09, + "grad_norm": 3.384772094190995, + "learning_rate": 9.902872659329262e-06, + "loss": 0.638, "step": 1282 }, { - "epoch": 0.14, - "grad_norm": 2.3522287088330436, - "learning_rate": 9.714064902856136e-06, - "loss": 0.6857, + "epoch": 0.09, + "grad_norm": 1.7866847027448565, + "learning_rate": 9.902647124355403e-06, + "loss": 0.5583, "step": 1283 }, { - "epoch": 0.14, - "grad_norm": 2.8039331155505693, - "learning_rate": 9.713496568629447e-06, - "loss": 0.7953, + "epoch": 0.09, + "grad_norm": 1.6752402310169443, + "learning_rate": 9.902421330407113e-06, + "loss": 0.6332, "step": 1284 }, { - "epoch": 0.14, - "grad_norm": 2.5431568191768257, - "learning_rate": 9.71292768680235e-06, - "loss": 0.6906, + "epoch": 0.09, + "grad_norm": 2.239625640946203, + "learning_rate": 9.902195277496314e-06, + "loss": 0.5676, "step": 1285 }, { - "epoch": 0.14, - "grad_norm": 2.734010132334214, - "learning_rate": 9.712358257440942e-06, - "loss": 0.7469, + "epoch": 0.09, + "grad_norm": 1.8022405602708378, + "learning_rate": 9.901968965634951e-06, + "loss": 0.6221, "step": 1286 }, { - "epoch": 0.14, - "grad_norm": 2.766837316455564, - "learning_rate": 9.711788280611371e-06, - "loss": 0.7425, + "epoch": 0.09, + "grad_norm": 1.9479164162662685, + "learning_rate": 9.901742394834978e-06, + "loss": 0.6654, "step": 1287 }, { - "epoch": 0.14, - "grad_norm": 2.2607094221443815, - "learning_rate": 9.711217756379859e-06, - "loss": 0.6745, + "epoch": 0.09, + "grad_norm": 1.6437373470952514, + "learning_rate": 9.901515565108362e-06, + "loss": 0.6696, "step": 1288 }, { - "epoch": 0.14, - "grad_norm": 2.718072945572998, - "learning_rate": 9.710646684812686e-06, - "loss": 0.6632, + "epoch": 0.09, + "grad_norm": 1.649059219926303, + "learning_rate": 9.901288476467083e-06, + "loss": 0.5872, "step": 1289 }, { - "epoch": 0.14, - "grad_norm": 2.220340692493552, - "learning_rate": 9.7100750659762e-06, - "loss": 0.7289, + "epoch": 0.09, + "grad_norm": 1.7142790572321507, + "learning_rate": 9.90106112892314e-06, + "loss": 0.5768, "step": 1290 }, { - "epoch": 0.14, - "grad_norm": 3.630360305784288, - "learning_rate": 9.709502899936805e-06, - "loss": 0.6569, + "epoch": 0.09, + "grad_norm": 1.8303934529150983, + "learning_rate": 9.900833522488542e-06, + "loss": 0.5599, "step": 1291 }, { - "epoch": 0.14, - "grad_norm": 2.7311986179292194, - "learning_rate": 9.70893018676098e-06, - "loss": 0.7763, + "epoch": 0.09, + "grad_norm": 1.663633003491383, + "learning_rate": 9.900605657175311e-06, + "loss": 0.5301, "step": 1292 }, { - "epoch": 0.14, - "grad_norm": 2.292026620226449, - "learning_rate": 9.708356926515256e-06, - "loss": 0.7046, + "epoch": 0.09, + "grad_norm": 1.5649316109898668, + "learning_rate": 9.900377532995483e-06, + "loss": 0.5806, "step": 1293 }, { - "epoch": 0.14, - "grad_norm": 3.099523665954287, - "learning_rate": 9.707783119266236e-06, - "loss": 0.7467, + "epoch": 0.09, + "grad_norm": 1.6451674648531116, + "learning_rate": 9.90014914996111e-06, + "loss": 0.4933, "step": 1294 }, { - "epoch": 0.14, - "grad_norm": 2.9843246706602047, - "learning_rate": 9.707208765080583e-06, - "loss": 0.7871, + "epoch": 0.09, + "grad_norm": 1.1087209807515346, + "learning_rate": 9.899920508084253e-06, + "loss": 0.4641, "step": 1295 }, { - "epoch": 0.14, - "grad_norm": 2.603495110130337, - "learning_rate": 9.706633864025021e-06, - "loss": 0.6149, + "epoch": 0.09, + "grad_norm": 1.766917043451123, + "learning_rate": 9.899691607376994e-06, + "loss": 0.6108, "step": 1296 }, { - "epoch": 0.14, - "grad_norm": 2.9957523727453244, - "learning_rate": 9.706058416166342e-06, - "loss": 0.6866, + "epoch": 0.09, + "grad_norm": 1.7580224903077726, + "learning_rate": 9.89946244785142e-06, + "loss": 0.5794, "step": 1297 }, { - "epoch": 0.14, - "grad_norm": 3.4581527501195923, - "learning_rate": 9.705482421571401e-06, - "loss": 0.7306, + "epoch": 0.09, + "grad_norm": 1.9270486022285491, + "learning_rate": 9.899233029519639e-06, + "loss": 0.6296, "step": 1298 }, { - "epoch": 0.14, - "grad_norm": 2.75978405172835, - "learning_rate": 9.704905880307113e-06, - "loss": 0.704, + "epoch": 0.09, + "grad_norm": 2.230307602772785, + "learning_rate": 9.899003352393768e-06, + "loss": 0.6009, "step": 1299 }, { - "epoch": 0.14, - "grad_norm": 2.6219298892401675, - "learning_rate": 9.704328792440462e-06, - "loss": 0.7072, + "epoch": 0.09, + "grad_norm": 1.7316856634087157, + "learning_rate": 9.898773416485938e-06, + "loss": 0.5994, "step": 1300 }, { - "epoch": 0.14, - "grad_norm": 2.1583848744671967, - "learning_rate": 9.70375115803849e-06, - "loss": 0.6726, + "epoch": 0.09, + "grad_norm": 1.881291648098643, + "learning_rate": 9.898543221808299e-06, + "loss": 0.5823, "step": 1301 }, { - "epoch": 0.14, - "grad_norm": 2.7991267040996943, - "learning_rate": 9.703172977168307e-06, - "loss": 0.6575, + "epoch": 0.09, + "grad_norm": 2.0794535716666958, + "learning_rate": 9.898312768373008e-06, + "loss": 0.5832, "step": 1302 }, { - "epoch": 0.14, - "grad_norm": 9.725583520285742, - "learning_rate": 9.702594249897082e-06, - "loss": 0.735, + "epoch": 0.09, + "grad_norm": 1.860363783135927, + "learning_rate": 9.89808205619224e-06, + "loss": 0.5367, "step": 1303 }, { - "epoch": 0.14, - "grad_norm": 3.2733331053327124, - "learning_rate": 9.70201497629205e-06, - "loss": 0.7678, + "epoch": 0.09, + "grad_norm": 4.7691833475780125, + "learning_rate": 9.89785108527818e-06, + "loss": 0.544, "step": 1304 }, { - "epoch": 0.14, - "grad_norm": 3.3225063544881346, - "learning_rate": 9.701435156420511e-06, - "loss": 0.6528, + "epoch": 0.09, + "grad_norm": 1.685349383175745, + "learning_rate": 9.897619855643029e-06, + "loss": 0.5751, "step": 1305 }, { - "epoch": 0.14, - "grad_norm": 2.5695618934438342, - "learning_rate": 9.700854790349826e-06, - "loss": 0.7385, + "epoch": 0.09, + "grad_norm": 1.5000550718409968, + "learning_rate": 9.897388367299002e-06, + "loss": 0.5574, "step": 1306 }, { - "epoch": 0.14, - "grad_norm": 2.165748397571203, - "learning_rate": 9.700273878147419e-06, - "loss": 0.6923, + "epoch": 0.09, + "grad_norm": 1.772086774182155, + "learning_rate": 9.897156620258326e-06, + "loss": 0.53, "step": 1307 }, { - "epoch": 0.14, - "grad_norm": 3.946583806133681, - "learning_rate": 9.699692419880782e-06, - "loss": 0.6818, + "epoch": 0.09, + "grad_norm": 2.195287195478925, + "learning_rate": 9.896924614533246e-06, + "loss": 0.6622, "step": 1308 }, { - "epoch": 0.14, - "grad_norm": 2.7377235660563417, - "learning_rate": 9.699110415617464e-06, - "loss": 0.6344, + "epoch": 0.09, + "grad_norm": 1.6815398538194033, + "learning_rate": 9.896692350136013e-06, + "loss": 0.5351, "step": 1309 }, { - "epoch": 0.14, - "grad_norm": 4.216540668421752, - "learning_rate": 9.698527865425083e-06, - "loss": 0.7325, + "epoch": 0.09, + "grad_norm": 2.3943270254285483, + "learning_rate": 9.896459827078896e-06, + "loss": 0.6193, "step": 1310 }, { - "epoch": 0.14, - "grad_norm": 2.5772415638304755, - "learning_rate": 9.697944769371315e-06, - "loss": 0.6738, - "step": 1311 + "epoch": 0.09, + "grad_norm": 1.5310504935398441, + "learning_rate": 9.896227045374182e-06, + "loss": 0.5428, + "step": 1311 }, { - "epoch": 0.14, - "grad_norm": 3.4296412096784854, - "learning_rate": 9.697361127523905e-06, - "loss": 0.7011, + "epoch": 0.09, + "grad_norm": 1.9445873415145851, + "learning_rate": 9.895994005034164e-06, + "loss": 0.5945, "step": 1312 }, { - "epoch": 0.14, - "grad_norm": 3.0442601673343894, - "learning_rate": 9.696776939950657e-06, - "loss": 0.7084, + "epoch": 0.09, + "grad_norm": 1.6569725662881276, + "learning_rate": 9.895760706071154e-06, + "loss": 0.5532, "step": 1313 }, { - "epoch": 0.14, - "grad_norm": 3.075407781881538, - "learning_rate": 9.696192206719441e-06, - "loss": 0.7175, + "epoch": 0.09, + "grad_norm": 1.6969636872927831, + "learning_rate": 9.895527148497472e-06, + "loss": 0.5846, "step": 1314 }, { - "epoch": 0.14, - "grad_norm": 2.190682088347276, - "learning_rate": 9.69560692789819e-06, - "loss": 0.718, + "epoch": 0.09, + "grad_norm": 2.0862062095764475, + "learning_rate": 9.89529333232546e-06, + "loss": 0.6017, "step": 1315 }, { - "epoch": 0.14, - "grad_norm": 3.203702468047881, - "learning_rate": 9.695021103554901e-06, - "loss": 0.78, + "epoch": 0.09, + "grad_norm": 2.8323648036871965, + "learning_rate": 9.895059257567465e-06, + "loss": 0.6146, "step": 1316 }, { - "epoch": 0.14, - "grad_norm": 2.334461505397333, - "learning_rate": 9.694434733757632e-06, - "loss": 0.7062, + "epoch": 0.09, + "grad_norm": 2.0410247440072546, + "learning_rate": 9.894824924235855e-06, + "loss": 0.5383, "step": 1317 }, { - "epoch": 0.14, - "grad_norm": 2.3787385280317968, - "learning_rate": 9.693847818574504e-06, - "loss": 0.6883, + "epoch": 0.09, + "grad_norm": 1.9861623968625974, + "learning_rate": 9.894590332343005e-06, + "loss": 0.5667, "step": 1318 }, { - "epoch": 0.14, - "grad_norm": 1.7061551304241709, - "learning_rate": 9.693260358073707e-06, - "loss": 0.7412, + "epoch": 0.09, + "grad_norm": 2.0216831015068206, + "learning_rate": 9.89435548190131e-06, + "loss": 0.5808, "step": 1319 }, { - "epoch": 0.14, - "grad_norm": 3.5189339975333427, - "learning_rate": 9.692672352323486e-06, - "loss": 0.7493, + "epoch": 0.09, + "grad_norm": 0.9788110840017549, + "learning_rate": 9.894120372923172e-06, + "loss": 0.4723, "step": 1320 }, { - "epoch": 0.14, - "grad_norm": 2.718227304089354, - "learning_rate": 9.692083801392158e-06, - "loss": 0.7298, + "epoch": 0.09, + "grad_norm": 2.1235272219530383, + "learning_rate": 9.893885005421015e-06, + "loss": 0.5047, "step": 1321 }, { - "epoch": 0.14, - "grad_norm": 5.863146583225161, - "learning_rate": 9.691494705348097e-06, - "loss": 0.685, + "epoch": 0.09, + "grad_norm": 1.6779220654572204, + "learning_rate": 9.893649379407269e-06, + "loss": 0.5952, "step": 1322 }, { - "epoch": 0.14, - "grad_norm": 2.224062335592531, - "learning_rate": 9.690905064259744e-06, - "loss": 0.7131, + "epoch": 0.09, + "grad_norm": 1.776932945690128, + "learning_rate": 9.893413494894378e-06, + "loss": 0.6328, "step": 1323 }, { - "epoch": 0.14, - "grad_norm": 2.3482124963584043, - "learning_rate": 9.690314878195599e-06, - "loss": 0.7642, + "epoch": 0.09, + "grad_norm": 1.839696484408332, + "learning_rate": 9.893177351894807e-06, + "loss": 0.5846, "step": 1324 }, { - "epoch": 0.14, - "grad_norm": 2.802943387618265, - "learning_rate": 9.68972414722423e-06, - "loss": 0.7402, + "epoch": 0.09, + "grad_norm": 1.5927340655025057, + "learning_rate": 9.892940950421029e-06, + "loss": 0.5873, "step": 1325 }, { - "epoch": 0.14, - "grad_norm": 6.439325155682455, - "learning_rate": 9.689132871414266e-06, - "loss": 0.6827, + "epoch": 0.09, + "grad_norm": 1.6463108746052195, + "learning_rate": 9.892704290485528e-06, + "loss": 0.5417, "step": 1326 }, { - "epoch": 0.14, - "grad_norm": 2.283544545333155, - "learning_rate": 9.688541050834402e-06, - "loss": 0.7649, + "epoch": 0.09, + "grad_norm": 2.398331314673184, + "learning_rate": 9.892467372100808e-06, + "loss": 0.5912, "step": 1327 }, { - "epoch": 0.14, - "grad_norm": 2.8881726166539816, - "learning_rate": 9.68794868555339e-06, - "loss": 0.7581, + "epoch": 0.09, + "grad_norm": 1.6800396101970716, + "learning_rate": 9.892230195279386e-06, + "loss": 0.5938, "step": 1328 }, { - "epoch": 0.14, - "grad_norm": 2.5386184576990725, - "learning_rate": 9.687355775640052e-06, - "loss": 0.7252, + "epoch": 0.09, + "grad_norm": 1.8939746797389594, + "learning_rate": 9.891992760033786e-06, + "loss": 0.6346, "step": 1329 }, { - "epoch": 0.14, - "grad_norm": 2.6298695027488703, - "learning_rate": 9.68676232116327e-06, - "loss": 0.7742, + "epoch": 0.09, + "grad_norm": 2.962087853264865, + "learning_rate": 9.891755066376552e-06, + "loss": 0.5937, "step": 1330 }, { - "epoch": 0.14, - "grad_norm": 2.084061296828642, - "learning_rate": 9.686168322191988e-06, - "loss": 0.6, + "epoch": 0.09, + "grad_norm": 1.0186779810421223, + "learning_rate": 9.891517114320239e-06, + "loss": 0.4831, "step": 1331 }, { - "epoch": 0.14, - "grad_norm": 2.3716239247862054, - "learning_rate": 9.685573778795218e-06, - "loss": 0.5694, + "epoch": 0.09, + "grad_norm": 1.7525537347974018, + "learning_rate": 9.89127890387742e-06, + "loss": 0.594, "step": 1332 }, { - "epoch": 0.14, - "grad_norm": 3.2053716555139404, - "learning_rate": 9.684978691042031e-06, - "loss": 0.7231, + "epoch": 0.09, + "grad_norm": 2.075203807381958, + "learning_rate": 9.891040435060672e-06, + "loss": 0.6373, "step": 1333 }, { - "epoch": 0.14, - "grad_norm": 2.844182707802076, - "learning_rate": 9.684383059001562e-06, - "loss": 0.6835, + "epoch": 0.09, + "grad_norm": 1.640400335641878, + "learning_rate": 9.890801707882598e-06, + "loss": 0.5802, "step": 1334 }, { - "epoch": 0.14, - "grad_norm": 3.390207972128154, - "learning_rate": 9.68378688274301e-06, - "loss": 0.7385, + "epoch": 0.09, + "grad_norm": 1.6790695787142118, + "learning_rate": 9.890562722355804e-06, + "loss": 0.5218, "step": 1335 }, { - "epoch": 0.14, - "grad_norm": 2.4980529568022267, - "learning_rate": 9.683190162335638e-06, - "loss": 0.8148, + "epoch": 0.09, + "grad_norm": 1.8131903722706173, + "learning_rate": 9.890323478492915e-06, + "loss": 0.5598, "step": 1336 }, { - "epoch": 0.14, - "grad_norm": 4.308242765317922, - "learning_rate": 9.68259289784877e-06, - "loss": 0.604, + "epoch": 0.09, + "grad_norm": 2.1050705278076056, + "learning_rate": 9.89008397630657e-06, + "loss": 0.6923, "step": 1337 }, { - "epoch": 0.14, - "grad_norm": 2.5111104713428425, - "learning_rate": 9.681995089351797e-06, - "loss": 0.6423, + "epoch": 0.09, + "grad_norm": 1.618339080207912, + "learning_rate": 9.889844215809421e-06, + "loss": 0.5674, "step": 1338 }, { - "epoch": 0.14, - "grad_norm": 2.184046389884775, - "learning_rate": 9.681396736914169e-06, - "loss": 0.8212, + "epoch": 0.1, + "grad_norm": 2.1084757705216957, + "learning_rate": 9.889604197014131e-06, + "loss": 0.5198, "step": 1339 }, { - "epoch": 0.14, - "grad_norm": 2.3349747833725316, - "learning_rate": 9.680797840605398e-06, - "loss": 0.724, + "epoch": 0.1, + "grad_norm": 1.5704978384104147, + "learning_rate": 9.889363919933378e-06, + "loss": 0.5306, "step": 1340 }, { - "epoch": 0.14, - "grad_norm": 3.357395284413713, - "learning_rate": 9.680198400495067e-06, - "loss": 0.7505, + "epoch": 0.1, + "grad_norm": 2.004931824762026, + "learning_rate": 9.889123384579856e-06, + "loss": 0.6032, "step": 1341 }, { - "epoch": 0.14, - "grad_norm": 3.237004929584163, - "learning_rate": 9.679598416652814e-06, - "loss": 0.7294, + "epoch": 0.1, + "grad_norm": 1.5742977725906473, + "learning_rate": 9.888882590966268e-06, + "loss": 0.5218, "step": 1342 }, { - "epoch": 0.14, - "grad_norm": 2.3140384711893978, - "learning_rate": 9.678997889148342e-06, - "loss": 0.7354, + "epoch": 0.1, + "grad_norm": 0.9949687699244333, + "learning_rate": 9.888641539105338e-06, + "loss": 0.4688, "step": 1343 }, { - "epoch": 0.14, - "grad_norm": 2.396103673854701, - "learning_rate": 9.678396818051423e-06, - "loss": 0.7875, + "epoch": 0.1, + "grad_norm": 2.1446940836889308, + "learning_rate": 9.888400229009796e-06, + "loss": 0.5794, "step": 1344 }, { - "epoch": 0.14, - "grad_norm": 2.4741313274912935, - "learning_rate": 9.677795203431886e-06, - "loss": 0.7377, + "epoch": 0.1, + "grad_norm": 1.7524054776251003, + "learning_rate": 9.88815866069239e-06, + "loss": 0.5762, "step": 1345 }, { - "epoch": 0.14, - "grad_norm": 3.4119548772690003, - "learning_rate": 9.677193045359626e-06, - "loss": 0.6744, + "epoch": 0.1, + "grad_norm": 1.8018873812021827, + "learning_rate": 9.887916834165881e-06, + "loss": 0.5488, "step": 1346 }, { - "epoch": 0.14, - "grad_norm": 3.0195566082707535, - "learning_rate": 9.676590343904595e-06, - "loss": 0.5983, + "epoch": 0.1, + "grad_norm": 1.8624604865936865, + "learning_rate": 9.887674749443041e-06, + "loss": 0.6132, "step": 1347 }, { - "epoch": 0.14, - "grad_norm": 2.429934472959647, - "learning_rate": 9.675987099136817e-06, - "loss": 0.6282, + "epoch": 0.1, + "grad_norm": 2.1302568016964893, + "learning_rate": 9.88743240653666e-06, + "loss": 0.5867, "step": 1348 }, { - "epoch": 0.14, - "grad_norm": 2.399876917459758, - "learning_rate": 9.675383311126376e-06, - "loss": 0.7758, + "epoch": 0.1, + "grad_norm": 2.0794488835296883, + "learning_rate": 9.887189805459538e-06, + "loss": 0.6357, "step": 1349 }, { - "epoch": 0.14, - "grad_norm": 2.3597263333081977, - "learning_rate": 9.674778979943417e-06, - "loss": 0.7706, + "epoch": 0.1, + "grad_norm": 1.9293632110340833, + "learning_rate": 9.88694694622449e-06, + "loss": 0.5823, "step": 1350 }, { - "epoch": 0.14, - "grad_norm": 2.582735032637409, - "learning_rate": 9.67417410565815e-06, - "loss": 0.6888, + "epoch": 0.1, + "grad_norm": 1.9122004757017805, + "learning_rate": 9.886703828844346e-06, + "loss": 0.5592, "step": 1351 }, { - "epoch": 0.14, - "grad_norm": 1.2457596734399106, - "learning_rate": 9.673568688340846e-06, - "loss": 0.6083, + "epoch": 0.1, + "grad_norm": 0.9722316927334979, + "learning_rate": 9.886460453331947e-06, + "loss": 0.478, "step": 1352 }, { - "epoch": 0.14, - "grad_norm": 2.2475959881242735, - "learning_rate": 9.672962728061842e-06, - "loss": 0.7384, + "epoch": 0.1, + "grad_norm": 1.656189158121428, + "learning_rate": 9.88621681970015e-06, + "loss": 0.6174, "step": 1353 }, { - "epoch": 0.14, - "grad_norm": 2.7160143206592346, - "learning_rate": 9.672356224891536e-06, - "loss": 0.6937, + "epoch": 0.1, + "grad_norm": 1.6861959515573761, + "learning_rate": 9.885972927961822e-06, + "loss": 0.5726, "step": 1354 }, { - "epoch": 0.14, - "grad_norm": 2.5502591319299843, - "learning_rate": 9.671749178900392e-06, - "loss": 0.7563, + "epoch": 0.1, + "grad_norm": 1.9496077551981468, + "learning_rate": 9.88572877812985e-06, + "loss": 0.6106, "step": 1355 }, { - "epoch": 0.14, - "grad_norm": 2.6327155581660455, - "learning_rate": 9.67114159015893e-06, - "loss": 0.6819, + "epoch": 0.1, + "grad_norm": 1.8746979828578094, + "learning_rate": 9.885484370217127e-06, + "loss": 0.5893, "step": 1356 }, { - "epoch": 0.14, - "grad_norm": 2.131038039139984, - "learning_rate": 9.670533458737744e-06, - "loss": 0.7373, + "epoch": 0.1, + "grad_norm": 0.8776452706712202, + "learning_rate": 9.885239704236567e-06, + "loss": 0.4546, "step": 1357 }, { - "epoch": 0.14, - "grad_norm": 3.385584377854225, - "learning_rate": 9.66992478470748e-06, - "loss": 0.7657, + "epoch": 0.1, + "grad_norm": 0.7957081751554597, + "learning_rate": 9.884994780201092e-06, + "loss": 0.4729, "step": 1358 }, { - "epoch": 0.14, - "grad_norm": 7.320376159534197, - "learning_rate": 9.669315568138854e-06, - "loss": 0.6898, + "epoch": 0.1, + "grad_norm": 1.675260193758147, + "learning_rate": 9.884749598123638e-06, + "loss": 0.5906, "step": 1359 }, { - "epoch": 0.14, - "grad_norm": 2.8074892746810742, - "learning_rate": 9.668705809102644e-06, - "loss": 0.6807, + "epoch": 0.1, + "grad_norm": 0.9327316732091321, + "learning_rate": 9.88450415801716e-06, + "loss": 0.4625, "step": 1360 }, { - "epoch": 0.14, - "grad_norm": 2.8098437749259717, - "learning_rate": 9.668095507669688e-06, - "loss": 0.6959, + "epoch": 0.1, + "grad_norm": 1.5957453136922894, + "learning_rate": 9.884258459894622e-06, + "loss": 0.6584, "step": 1361 }, { - "epoch": 0.14, - "grad_norm": 2.7943446077345855, - "learning_rate": 9.66748466391089e-06, - "loss": 0.7087, + "epoch": 0.1, + "grad_norm": 1.6592526460131953, + "learning_rate": 9.884012503769e-06, + "loss": 0.5582, "step": 1362 }, { - "epoch": 0.14, - "grad_norm": 6.059820137036976, - "learning_rate": 9.666873277897216e-06, - "loss": 0.6634, + "epoch": 0.1, + "grad_norm": 2.6922563835888456, + "learning_rate": 9.883766289653291e-06, + "loss": 0.5549, "step": 1363 }, { - "epoch": 0.14, - "grad_norm": 2.575688586030959, - "learning_rate": 9.666261349699696e-06, - "loss": 0.6982, + "epoch": 0.1, + "grad_norm": 1.7760343892069097, + "learning_rate": 9.883519817560497e-06, + "loss": 0.6305, "step": 1364 }, { - "epoch": 0.14, - "grad_norm": 2.262608946469177, - "learning_rate": 9.66564887938942e-06, - "loss": 0.8071, + "epoch": 0.1, + "grad_norm": 0.905601632868781, + "learning_rate": 9.883273087503638e-06, + "loss": 0.4594, "step": 1365 }, { - "epoch": 0.14, - "grad_norm": 2.311694063851022, - "learning_rate": 9.665035867037545e-06, - "loss": 0.7185, + "epoch": 0.1, + "grad_norm": 2.612005952517804, + "learning_rate": 9.883026099495749e-06, + "loss": 0.4979, "step": 1366 }, { - "epoch": 0.14, - "grad_norm": 2.1721646707712954, - "learning_rate": 9.66442231271529e-06, - "loss": 0.6839, + "epoch": 0.1, + "grad_norm": 1.7579053370491673, + "learning_rate": 9.882778853549874e-06, + "loss": 0.5544, "step": 1367 }, { - "epoch": 0.14, - "grad_norm": 2.056959685399615, - "learning_rate": 9.663808216493931e-06, - "loss": 0.6246, + "epoch": 0.1, + "grad_norm": 1.9690079749625564, + "learning_rate": 9.882531349679076e-06, + "loss": 0.6154, "step": 1368 }, { - "epoch": 0.14, - "grad_norm": 3.211417541737079, - "learning_rate": 9.663193578444815e-06, - "loss": 0.7382, + "epoch": 0.1, + "grad_norm": 2.0707363390114595, + "learning_rate": 9.882283587896426e-06, + "loss": 0.6114, "step": 1369 }, { - "epoch": 0.14, - "grad_norm": 3.482753174472199, - "learning_rate": 9.662578398639353e-06, - "loss": 0.7261, + "epoch": 0.1, + "grad_norm": 1.9054145000893203, + "learning_rate": 9.882035568215016e-06, + "loss": 0.5571, "step": 1370 }, { - "epoch": 0.14, - "grad_norm": 2.8419175901293308, - "learning_rate": 9.661962677149007e-06, - "loss": 0.7479, + "epoch": 0.1, + "grad_norm": 1.7902440879004258, + "learning_rate": 9.881787290647943e-06, + "loss": 0.6072, "step": 1371 }, { - "epoch": 0.14, - "grad_norm": 1.562383209374972, - "learning_rate": 9.661346414045315e-06, - "loss": 0.6338, + "epoch": 0.1, + "grad_norm": 1.7707260498656345, + "learning_rate": 9.881538755208324e-06, + "loss": 0.6074, "step": 1372 }, { - "epoch": 0.14, - "grad_norm": 2.1133024703227266, - "learning_rate": 9.66072960939987e-06, - "loss": 0.8398, + "epoch": 0.1, + "grad_norm": 1.523197096700819, + "learning_rate": 9.881289961909287e-06, + "loss": 0.5912, "step": 1373 }, { - "epoch": 0.14, - "grad_norm": 2.209697515295642, - "learning_rate": 9.660112263284334e-06, - "loss": 0.6979, + "epoch": 0.1, + "grad_norm": 2.1116212262284306, + "learning_rate": 9.881040910763974e-06, + "loss": 0.5876, "step": 1374 }, { - "epoch": 0.14, - "grad_norm": 4.316723331737145, - "learning_rate": 9.659494375770424e-06, - "loss": 0.7773, + "epoch": 0.1, + "grad_norm": 1.5726819488627966, + "learning_rate": 9.88079160178554e-06, + "loss": 0.6164, "step": 1375 }, { - "epoch": 0.14, - "grad_norm": 2.3531621544495867, - "learning_rate": 9.65887594692993e-06, - "loss": 0.7506, + "epoch": 0.1, + "grad_norm": 1.890174774757809, + "learning_rate": 9.880542034987158e-06, + "loss": 0.6076, "step": 1376 }, { - "epoch": 0.14, - "grad_norm": 5.6434780012213634, - "learning_rate": 9.658256976834692e-06, - "loss": 0.7223, + "epoch": 0.1, + "grad_norm": 1.6738298907818343, + "learning_rate": 9.880292210382005e-06, + "loss": 0.5795, "step": 1377 }, { - "epoch": 0.15, - "grad_norm": 2.899949137691732, - "learning_rate": 9.657637465556626e-06, - "loss": 0.7059, + "epoch": 0.1, + "grad_norm": 0.8874690448743272, + "learning_rate": 9.880042127983282e-06, + "loss": 0.4662, "step": 1378 }, { - "epoch": 0.15, - "grad_norm": 3.154298474199794, - "learning_rate": 9.657017413167702e-06, - "loss": 0.6951, + "epoch": 0.1, + "grad_norm": 1.8534001522351713, + "learning_rate": 9.879791787804199e-06, + "loss": 0.6306, "step": 1379 }, { - "epoch": 0.15, - "grad_norm": 2.762468831498145, - "learning_rate": 9.656396819739959e-06, - "loss": 0.7888, + "epoch": 0.1, + "grad_norm": 0.8742899839357069, + "learning_rate": 9.879541189857978e-06, + "loss": 0.47, "step": 1380 }, { - "epoch": 0.15, - "grad_norm": 1.3312043862215792, - "learning_rate": 9.655775685345493e-06, - "loss": 0.6494, + "epoch": 0.1, + "grad_norm": 1.8349905137473284, + "learning_rate": 9.879290334157857e-06, + "loss": 0.6415, "step": 1381 }, { - "epoch": 0.15, - "grad_norm": 3.5501629949975944, - "learning_rate": 9.655154010056464e-06, - "loss": 0.8004, + "epoch": 0.1, + "grad_norm": 1.6677320992876723, + "learning_rate": 9.879039220717089e-06, + "loss": 0.5991, "step": 1382 }, { - "epoch": 0.15, - "grad_norm": 4.64326272620766, - "learning_rate": 9.654531793945102e-06, - "loss": 0.7567, + "epoch": 0.1, + "grad_norm": 1.7094385414930173, + "learning_rate": 9.878787849548934e-06, + "loss": 0.5745, "step": 1383 }, { - "epoch": 0.15, - "grad_norm": 2.575524144576267, - "learning_rate": 9.653909037083689e-06, - "loss": 0.7945, + "epoch": 0.1, + "grad_norm": 2.7082202864508713, + "learning_rate": 9.878536220666674e-06, + "loss": 0.5477, "step": 1384 }, { - "epoch": 0.15, - "grad_norm": 2.9295057714053, - "learning_rate": 9.653285739544578e-06, - "loss": 0.7318, + "epoch": 0.1, + "grad_norm": 1.904464090837267, + "learning_rate": 9.8782843340836e-06, + "loss": 0.6494, "step": 1385 }, { - "epoch": 0.15, - "grad_norm": 2.0997333286555233, - "learning_rate": 9.65266190140018e-06, - "loss": 0.6631, + "epoch": 0.1, + "grad_norm": 2.0391146150216755, + "learning_rate": 9.87803218981302e-06, + "loss": 0.6839, "step": 1386 }, { - "epoch": 0.15, - "grad_norm": 2.2304269290377787, - "learning_rate": 9.652037522722974e-06, - "loss": 0.6277, + "epoch": 0.1, + "grad_norm": 1.6545363508881952, + "learning_rate": 9.87777978786825e-06, + "loss": 0.6067, "step": 1387 }, { - "epoch": 0.15, - "grad_norm": 4.44783772978294, - "learning_rate": 9.651412603585495e-06, - "loss": 0.7528, + "epoch": 0.1, + "grad_norm": 2.0163313088154173, + "learning_rate": 9.87752712826262e-06, + "loss": 0.5801, "step": 1388 }, { - "epoch": 0.15, - "grad_norm": 2.178234675495518, - "learning_rate": 9.650787144060345e-06, - "loss": 0.753, + "epoch": 0.1, + "grad_norm": 2.0057849125615226, + "learning_rate": 9.877274211009482e-06, + "loss": 0.4967, "step": 1389 }, { - "epoch": 0.15, - "grad_norm": 2.2283048639192446, - "learning_rate": 9.65016114422019e-06, - "loss": 0.7198, + "epoch": 0.1, + "grad_norm": 1.659019327226779, + "learning_rate": 9.877021036122194e-06, + "loss": 0.5553, "step": 1390 }, { - "epoch": 0.15, - "grad_norm": 2.4993099361247664, - "learning_rate": 9.649534604137755e-06, - "loss": 0.7052, + "epoch": 0.1, + "grad_norm": 0.9591829540884204, + "learning_rate": 9.87676760361413e-06, + "loss": 0.4478, "step": 1391 }, { - "epoch": 0.15, - "grad_norm": 3.1995602061556725, - "learning_rate": 9.64890752388583e-06, - "loss": 0.7257, + "epoch": 0.1, + "grad_norm": 2.2860450886430064, + "learning_rate": 9.876513913498676e-06, + "loss": 0.5312, "step": 1392 }, { - "epoch": 0.15, - "grad_norm": 2.2307125057623227, - "learning_rate": 9.648279903537268e-06, - "loss": 0.7356, + "epoch": 0.1, + "grad_norm": 1.6637381565510574, + "learning_rate": 9.876259965789232e-06, + "loss": 0.5951, "step": 1393 }, { - "epoch": 0.15, - "grad_norm": 2.1622062738792347, - "learning_rate": 9.647651743164983e-06, - "loss": 0.6852, + "epoch": 0.1, + "grad_norm": 1.8086496448379972, + "learning_rate": 9.876005760499215e-06, + "loss": 0.5952, "step": 1394 }, { - "epoch": 0.15, - "grad_norm": 3.3927400290041, - "learning_rate": 9.647023042841953e-06, - "loss": 0.6911, + "epoch": 0.1, + "grad_norm": 1.6376146336386441, + "learning_rate": 9.87575129764205e-06, + "loss": 0.564, "step": 1395 }, { - "epoch": 0.15, - "grad_norm": 4.504113313434371, - "learning_rate": 9.64639380264122e-06, - "loss": 0.7242, + "epoch": 0.1, + "grad_norm": 1.6583737900036206, + "learning_rate": 9.875496577231181e-06, + "loss": 0.6026, "step": 1396 }, { - "epoch": 0.15, - "grad_norm": 2.837839765380396, - "learning_rate": 9.645764022635886e-06, - "loss": 0.6788, + "epoch": 0.1, + "grad_norm": 1.6521123902596038, + "learning_rate": 9.875241599280063e-06, + "loss": 0.5704, "step": 1397 }, { - "epoch": 0.15, - "grad_norm": 2.2209452355398236, - "learning_rate": 9.645133702899116e-06, - "loss": 0.6795, + "epoch": 0.1, + "grad_norm": 2.0089755274155285, + "learning_rate": 9.874986363802163e-06, + "loss": 0.5989, "step": 1398 }, { - "epoch": 0.15, - "grad_norm": 3.3171450711888, - "learning_rate": 9.644502843504141e-06, - "loss": 0.7098, + "epoch": 0.1, + "grad_norm": 1.901134438407413, + "learning_rate": 9.874730870810964e-06, + "loss": 0.5195, "step": 1399 }, { - "epoch": 0.15, - "grad_norm": 2.433039777766387, - "learning_rate": 9.643871444524253e-06, - "loss": 0.7227, + "epoch": 0.1, + "grad_norm": 1.9062941128993003, + "learning_rate": 9.874475120319965e-06, + "loss": 0.6028, "step": 1400 }, { - "epoch": 0.15, - "grad_norm": 2.572768002891731, - "learning_rate": 9.643239506032805e-06, - "loss": 0.7593, + "epoch": 0.1, + "grad_norm": 1.6511868543735315, + "learning_rate": 9.87421911234267e-06, + "loss": 0.6108, "step": 1401 }, { - "epoch": 0.15, - "grad_norm": 2.125858662151184, - "learning_rate": 9.642607028103212e-06, - "loss": 0.7119, + "epoch": 0.1, + "grad_norm": 1.6704424806548632, + "learning_rate": 9.87396284689261e-06, + "loss": 0.508, "step": 1402 }, { - "epoch": 0.15, - "grad_norm": 2.685061632087422, - "learning_rate": 9.641974010808954e-06, - "loss": 0.6543, + "epoch": 0.1, + "grad_norm": 1.5990269055654875, + "learning_rate": 9.873706323983314e-06, + "loss": 0.6269, "step": 1403 }, { - "epoch": 0.15, - "grad_norm": 2.22659599285853, - "learning_rate": 9.641340454223576e-06, - "loss": 0.7721, + "epoch": 0.1, + "grad_norm": 1.6517162408667507, + "learning_rate": 9.873449543628336e-06, + "loss": 0.5791, "step": 1404 }, { - "epoch": 0.15, - "grad_norm": 3.4662300157631596, - "learning_rate": 9.64070635842068e-06, - "loss": 0.7117, + "epoch": 0.1, + "grad_norm": 1.82014702658685, + "learning_rate": 9.87319250584124e-06, + "loss": 0.6256, "step": 1405 }, { - "epoch": 0.15, - "grad_norm": 2.643822549271141, - "learning_rate": 9.640071723473934e-06, - "loss": 0.7299, + "epoch": 0.1, + "grad_norm": 1.1553734039171788, + "learning_rate": 9.872935210635602e-06, + "loss": 0.4683, "step": 1406 }, { - "epoch": 0.15, - "grad_norm": 2.595323686997289, - "learning_rate": 9.639436549457069e-06, - "loss": 0.7492, + "epoch": 0.1, + "grad_norm": 2.9897352590189112, + "learning_rate": 9.872677658025014e-06, + "loss": 0.6422, "step": 1407 }, { - "epoch": 0.15, - "grad_norm": 2.4646272228107784, - "learning_rate": 9.638800836443875e-06, - "loss": 0.7249, + "epoch": 0.1, + "grad_norm": 1.631784403667013, + "learning_rate": 9.872419848023084e-06, + "loss": 0.5606, "step": 1408 }, { - "epoch": 0.15, - "grad_norm": 2.889739658027975, - "learning_rate": 9.638164584508211e-06, - "loss": 0.7073, + "epoch": 0.1, + "grad_norm": 2.0214091419497486, + "learning_rate": 9.872161780643425e-06, + "loss": 0.6265, "step": 1409 }, { - "epoch": 0.15, - "grad_norm": 2.731955437980805, - "learning_rate": 9.637527793723993e-06, - "loss": 0.6637, + "epoch": 0.1, + "grad_norm": 1.6810760993906442, + "learning_rate": 9.871903455899674e-06, + "loss": 0.5728, "step": 1410 }, { - "epoch": 0.15, - "grad_norm": 7.330236462820327, - "learning_rate": 9.6368904641652e-06, - "loss": 0.6849, + "epoch": 0.1, + "grad_norm": 1.8721409896794687, + "learning_rate": 9.871644873805473e-06, + "loss": 0.5998, "step": 1411 }, { - "epoch": 0.15, - "grad_norm": 2.09841411709079, - "learning_rate": 9.63625259590588e-06, - "loss": 0.7158, + "epoch": 0.1, + "grad_norm": 1.6225167295846379, + "learning_rate": 9.871386034374481e-06, + "loss": 0.57, "step": 1412 }, { - "epoch": 0.15, - "grad_norm": 2.629199349370734, - "learning_rate": 9.635614189020133e-06, - "loss": 0.7321, + "epoch": 0.1, + "grad_norm": 1.5770990401282194, + "learning_rate": 9.871126937620374e-06, + "loss": 0.6318, "step": 1413 }, { - "epoch": 0.15, - "grad_norm": 2.344946491188394, - "learning_rate": 9.63497524358213e-06, - "loss": 0.746, + "epoch": 0.1, + "grad_norm": 1.6992957592098146, + "learning_rate": 9.870867583556836e-06, + "loss": 0.5733, "step": 1414 }, { - "epoch": 0.15, - "grad_norm": 2.6910188223643856, - "learning_rate": 9.6343357596661e-06, - "loss": 0.6378, + "epoch": 0.1, + "grad_norm": 1.7019954839069762, + "learning_rate": 9.870607972197568e-06, + "loss": 0.6263, "step": 1415 }, { - "epoch": 0.15, - "grad_norm": 3.2643901325386224, - "learning_rate": 9.633695737346341e-06, - "loss": 0.7012, + "epoch": 0.1, + "grad_norm": 1.6622363015669066, + "learning_rate": 9.870348103556282e-06, + "loss": 0.6661, "step": 1416 }, { - "epoch": 0.15, - "grad_norm": 2.741008316052005, - "learning_rate": 9.633055176697205e-06, - "loss": 0.7338, + "epoch": 0.1, + "grad_norm": 0.9351899287855089, + "learning_rate": 9.870087977646707e-06, + "loss": 0.4757, "step": 1417 }, { - "epoch": 0.15, - "grad_norm": 3.204256419036011, - "learning_rate": 9.632414077793111e-06, - "loss": 0.7241, + "epoch": 0.1, + "grad_norm": 2.037294074386783, + "learning_rate": 9.869827594482582e-06, + "loss": 0.6337, "step": 1418 }, { - "epoch": 0.15, - "grad_norm": 2.440995779151848, - "learning_rate": 9.63177244070854e-06, - "loss": 0.6625, + "epoch": 0.1, + "grad_norm": 2.373470436973459, + "learning_rate": 9.869566954077662e-06, + "loss": 0.586, "step": 1419 }, { - "epoch": 0.15, - "grad_norm": 2.5374105809523626, - "learning_rate": 9.631130265518036e-06, - "loss": 0.7329, + "epoch": 0.1, + "grad_norm": 1.9651152257719244, + "learning_rate": 9.869306056445717e-06, + "loss": 0.5063, "step": 1420 }, { - "epoch": 0.15, - "grad_norm": 2.46110371173565, - "learning_rate": 9.630487552296207e-06, - "loss": 0.7371, + "epoch": 0.1, + "grad_norm": 1.9959977057958365, + "learning_rate": 9.869044901600526e-06, + "loss": 0.6006, "step": 1421 }, { - "epoch": 0.15, - "grad_norm": 3.3468353330936393, - "learning_rate": 9.629844301117717e-06, - "loss": 0.6405, + "epoch": 0.1, + "grad_norm": 1.92125259190946, + "learning_rate": 9.868783489555883e-06, + "loss": 0.5855, "step": 1422 }, { - "epoch": 0.15, - "grad_norm": 2.397830492051804, - "learning_rate": 9.6292005120573e-06, - "loss": 0.6774, + "epoch": 0.1, + "grad_norm": 1.716970955198324, + "learning_rate": 9.8685218203256e-06, + "loss": 0.6074, "step": 1423 }, { - "epoch": 0.15, - "grad_norm": 3.7532545714870977, - "learning_rate": 9.62855618518975e-06, - "loss": 0.7351, + "epoch": 0.1, + "grad_norm": 2.3122639599490284, + "learning_rate": 9.868259893923498e-06, + "loss": 0.6465, "step": 1424 }, { - "epoch": 0.15, - "grad_norm": 3.1816654437320833, - "learning_rate": 9.627911320589922e-06, - "loss": 0.7497, + "epoch": 0.1, + "grad_norm": 0.9404257486096801, + "learning_rate": 9.86799771036341e-06, + "loss": 0.4639, "step": 1425 }, { - "epoch": 0.15, - "grad_norm": 2.4023123862610016, - "learning_rate": 9.627265918332734e-06, - "loss": 0.7702, + "epoch": 0.1, + "grad_norm": 9.481099703646525, + "learning_rate": 9.867735269659191e-06, + "loss": 0.5019, "step": 1426 }, { - "epoch": 0.15, - "grad_norm": 2.6310701737673368, - "learning_rate": 9.626619978493168e-06, - "loss": 0.7071, + "epoch": 0.1, + "grad_norm": 2.330164184514846, + "learning_rate": 9.867472571824699e-06, + "loss": 0.6522, "step": 1427 }, { - "epoch": 0.15, - "grad_norm": 2.44463817108983, - "learning_rate": 9.62597350114627e-06, - "loss": 0.6781, + "epoch": 0.1, + "grad_norm": 2.3864976308866823, + "learning_rate": 9.867209616873815e-06, + "loss": 0.608, "step": 1428 }, { - "epoch": 0.15, - "grad_norm": 2.4602674966430977, - "learning_rate": 9.625326486367139e-06, - "loss": 0.7439, + "epoch": 0.1, + "grad_norm": 1.673787400231851, + "learning_rate": 9.866946404820423e-06, + "loss": 0.5679, "step": 1429 }, { - "epoch": 0.15, - "grad_norm": 2.4828542109646614, - "learning_rate": 9.624678934230948e-06, - "loss": 0.7377, + "epoch": 0.1, + "grad_norm": 0.8949518955091295, + "learning_rate": 9.866682935678434e-06, + "loss": 0.4436, "step": 1430 }, { - "epoch": 0.15, - "grad_norm": 2.846123388535948, - "learning_rate": 9.624030844812926e-06, - "loss": 0.6388, + "epoch": 0.1, + "grad_norm": 1.0951109104096572, + "learning_rate": 9.866419209461759e-06, + "loss": 0.4941, "step": 1431 }, { - "epoch": 0.15, - "grad_norm": 2.1757720444416084, - "learning_rate": 9.623382218188371e-06, - "loss": 0.6985, + "epoch": 0.1, + "grad_norm": 1.7284872333542305, + "learning_rate": 9.866155226184334e-06, + "loss": 0.5581, "step": 1432 }, { - "epoch": 0.15, - "grad_norm": 3.259786537034406, - "learning_rate": 9.622733054432631e-06, - "loss": 0.7484, + "epoch": 0.1, + "grad_norm": 1.9827106237175938, + "learning_rate": 9.865890985860098e-06, + "loss": 0.5509, "step": 1433 }, { - "epoch": 0.15, - "grad_norm": 2.212671951693864, - "learning_rate": 9.62208335362113e-06, - "loss": 0.6293, + "epoch": 0.1, + "grad_norm": 2.0066137464984792, + "learning_rate": 9.865626488503013e-06, + "loss": 0.5549, "step": 1434 }, { - "epoch": 0.15, - "grad_norm": 2.818588711614401, - "learning_rate": 9.621433115829344e-06, - "loss": 0.7193, + "epoch": 0.1, + "grad_norm": 1.8035790701361751, + "learning_rate": 9.865361734127052e-06, + "loss": 0.5951, "step": 1435 }, { - "epoch": 0.15, - "grad_norm": 4.029835287597727, - "learning_rate": 9.62078234113282e-06, - "loss": 0.6895, + "epoch": 0.1, + "grad_norm": 2.11728905765854, + "learning_rate": 9.865096722746197e-06, + "loss": 0.6113, "step": 1436 }, { - "epoch": 0.15, - "grad_norm": 2.1696773604444783, - "learning_rate": 9.62013102960716e-06, - "loss": 0.7032, + "epoch": 0.1, + "grad_norm": 1.7121587680313333, + "learning_rate": 9.864831454374449e-06, + "loss": 0.6516, "step": 1437 }, { - "epoch": 0.15, - "grad_norm": 3.5749029914926895, - "learning_rate": 9.619479181328034e-06, - "loss": 0.6764, + "epoch": 0.1, + "grad_norm": 1.6593418535335818, + "learning_rate": 9.864565929025817e-06, + "loss": 0.54, "step": 1438 }, { - "epoch": 0.15, - "grad_norm": 2.8794399028585693, - "learning_rate": 9.618826796371168e-06, - "loss": 0.7734, + "epoch": 0.1, + "grad_norm": 2.934403320829197, + "learning_rate": 9.864300146714331e-06, + "loss": 0.5584, "step": 1439 }, { - "epoch": 0.15, - "grad_norm": 2.516706221446295, - "learning_rate": 9.618173874812357e-06, - "loss": 0.7291, + "epoch": 0.1, + "grad_norm": 2.227463207925463, + "learning_rate": 9.864034107454028e-06, + "loss": 0.5621, "step": 1440 }, { - "epoch": 0.15, - "grad_norm": 3.1628068758073637, - "learning_rate": 9.617520416727456e-06, - "loss": 0.7082, + "epoch": 0.1, + "grad_norm": 2.7975763799948856, + "learning_rate": 9.863767811258961e-06, + "loss": 0.5383, "step": 1441 }, { - "epoch": 0.15, - "grad_norm": 2.8419799887337898, - "learning_rate": 9.61686642219238e-06, - "loss": 0.7383, + "epoch": 0.1, + "grad_norm": 1.5590985199622502, + "learning_rate": 9.8635012581432e-06, + "loss": 0.5753, "step": 1442 }, { - "epoch": 0.15, - "grad_norm": 2.6343772160057055, - "learning_rate": 9.616211891283108e-06, - "loss": 0.6451, + "epoch": 0.1, + "grad_norm": 5.121568600115799, + "learning_rate": 9.863234448120823e-06, + "loss": 0.5054, "step": 1443 }, { - "epoch": 0.15, - "grad_norm": 3.1705848350056636, - "learning_rate": 9.615556824075684e-06, - "loss": 0.7876, + "epoch": 0.1, + "grad_norm": 1.9906501504891194, + "learning_rate": 9.862967381205921e-06, + "loss": 0.5811, "step": 1444 }, { - "epoch": 0.15, - "grad_norm": 3.483339076402114, - "learning_rate": 9.61490122064621e-06, - "loss": 0.7049, + "epoch": 0.1, + "grad_norm": 1.995421982165682, + "learning_rate": 9.862700057412604e-06, + "loss": 0.6755, "step": 1445 }, { - "epoch": 0.15, - "grad_norm": 2.0799562658988946, - "learning_rate": 9.614245081070851e-06, - "loss": 0.7073, + "epoch": 0.1, + "grad_norm": 1.8739437708609863, + "learning_rate": 9.862432476754995e-06, + "loss": 0.55, "step": 1446 }, { - "epoch": 0.15, - "grad_norm": 2.6861424428613367, - "learning_rate": 9.61358840542584e-06, - "loss": 0.6411, + "epoch": 0.1, + "grad_norm": 2.519220949222514, + "learning_rate": 9.862164639247224e-06, + "loss": 0.563, "step": 1447 }, { - "epoch": 0.15, - "grad_norm": 2.7191955155420042, - "learning_rate": 9.612931193787464e-06, - "loss": 0.6745, + "epoch": 0.1, + "grad_norm": 2.067812305906726, + "learning_rate": 9.861896544903443e-06, + "loss": 0.5594, "step": 1448 }, { - "epoch": 0.15, - "grad_norm": 6.472474388518654, - "learning_rate": 9.612273446232075e-06, - "loss": 0.7655, + "epoch": 0.1, + "grad_norm": 1.861383590271203, + "learning_rate": 9.861628193737811e-06, + "loss": 0.5543, "step": 1449 }, { - "epoch": 0.15, - "grad_norm": 2.9357438196561434, - "learning_rate": 9.61161516283609e-06, - "loss": 0.6609, + "epoch": 0.1, + "grad_norm": 1.89907605080924, + "learning_rate": 9.861359585764504e-06, + "loss": 0.6055, "step": 1450 }, { - "epoch": 0.15, - "grad_norm": 2.590924049130375, - "learning_rate": 9.610956343675988e-06, - "loss": 0.7238, + "epoch": 0.1, + "grad_norm": 0.9542722957519214, + "learning_rate": 9.861090720997715e-06, + "loss": 0.449, "step": 1451 }, { - "epoch": 0.15, - "grad_norm": 5.879538354937373, - "learning_rate": 9.610296988828305e-06, - "loss": 0.7834, + "epoch": 0.1, + "grad_norm": 3.7812884214850837, + "learning_rate": 9.860821599451639e-06, + "loss": 0.5602, "step": 1452 }, { - "epoch": 0.15, - "grad_norm": 3.1041931126773212, - "learning_rate": 9.609637098369646e-06, - "loss": 0.6807, + "epoch": 0.1, + "grad_norm": 2.028399645690091, + "learning_rate": 9.860552221140496e-06, + "loss": 0.5897, "step": 1453 }, { - "epoch": 0.15, - "grad_norm": 2.261974070639065, - "learning_rate": 9.608976672376673e-06, - "loss": 0.7105, + "epoch": 0.1, + "grad_norm": 2.214139585015646, + "learning_rate": 9.860282586078515e-06, + "loss": 0.5966, "step": 1454 }, { - "epoch": 0.15, - "grad_norm": 3.1210663189380314, - "learning_rate": 9.608315710926113e-06, - "loss": 0.6593, + "epoch": 0.1, + "grad_norm": 4.258955655091173, + "learning_rate": 9.860012694279939e-06, + "loss": 0.6704, "step": 1455 }, { - "epoch": 0.15, - "grad_norm": 3.050620721243887, - "learning_rate": 9.607654214094757e-06, - "loss": 0.6594, + "epoch": 0.1, + "grad_norm": 1.9263774385109618, + "learning_rate": 9.859742545759025e-06, + "loss": 0.5595, "step": 1456 }, { - "epoch": 0.15, - "grad_norm": 2.7905140014141727, - "learning_rate": 9.606992181959451e-06, - "loss": 0.7085, + "epoch": 0.1, + "grad_norm": 2.136398697505051, + "learning_rate": 9.859472140530041e-06, + "loss": 0.6063, "step": 1457 }, { - "epoch": 0.15, - "grad_norm": 2.7770890627009153, - "learning_rate": 9.606329614597114e-06, - "loss": 0.7159, + "epoch": 0.1, + "grad_norm": 1.8434398682311743, + "learning_rate": 9.859201478607274e-06, + "loss": 0.6289, "step": 1458 }, { - "epoch": 0.15, - "grad_norm": 1.9828864391785557, - "learning_rate": 9.605666512084716e-06, - "loss": 0.6708, + "epoch": 0.1, + "grad_norm": 2.090590812605694, + "learning_rate": 9.85893056000502e-06, + "loss": 0.6114, "step": 1459 }, { - "epoch": 0.15, - "grad_norm": 2.266774311682029, - "learning_rate": 9.605002874499296e-06, - "loss": 0.6211, + "epoch": 0.1, + "grad_norm": 2.1828092135057124, + "learning_rate": 9.858659384737588e-06, + "loss": 0.6413, "step": 1460 }, { - "epoch": 0.15, - "grad_norm": 11.988026431087897, - "learning_rate": 9.604338701917954e-06, - "loss": 0.7906, + "epoch": 0.1, + "grad_norm": 2.3776245917494676, + "learning_rate": 9.858387952819305e-06, + "loss": 0.6548, "step": 1461 }, { - "epoch": 0.15, - "grad_norm": 5.5367891929196, - "learning_rate": 9.603673994417853e-06, - "loss": 0.6816, + "epoch": 0.1, + "grad_norm": 1.9615480021563265, + "learning_rate": 9.858116264264508e-06, + "loss": 0.5579, "step": 1462 }, { - "epoch": 0.15, - "grad_norm": 2.3227720167759194, - "learning_rate": 9.603008752076213e-06, - "loss": 0.7428, + "epoch": 0.1, + "grad_norm": 2.1296370927360777, + "learning_rate": 9.857844319087548e-06, + "loss": 0.5831, "step": 1463 }, { - "epoch": 0.15, - "grad_norm": 3.309441828787185, - "learning_rate": 9.602342974970323e-06, - "loss": 0.675, + "epoch": 0.1, + "grad_norm": 1.8444820092635612, + "learning_rate": 9.85757211730279e-06, + "loss": 0.5541, "step": 1464 }, { - "epoch": 0.15, - "grad_norm": 2.523141442933751, - "learning_rate": 9.60167666317753e-06, - "loss": 0.6574, + "epoch": 0.1, + "grad_norm": 1.0228963123705634, + "learning_rate": 9.857299658924613e-06, + "loss": 0.4921, "step": 1465 }, { - "epoch": 0.15, - "grad_norm": 2.9705719635611074, - "learning_rate": 9.601009816775244e-06, - "loss": 0.6318, + "epoch": 0.1, + "grad_norm": 3.3255706664306106, + "learning_rate": 9.85702694396741e-06, + "loss": 0.644, "step": 1466 }, { - "epoch": 0.15, - "grad_norm": 2.4004597591752566, - "learning_rate": 9.600342435840939e-06, - "loss": 0.7256, + "epoch": 0.1, + "grad_norm": 2.359036497887175, + "learning_rate": 9.856753972445586e-06, + "loss": 0.5947, "step": 1467 }, { - "epoch": 0.15, - "grad_norm": 2.9444494206781227, - "learning_rate": 9.599674520452148e-06, - "loss": 0.7762, + "epoch": 0.1, + "grad_norm": 3.1193535244041626, + "learning_rate": 9.85648074437356e-06, + "loss": 0.5265, "step": 1468 }, { - "epoch": 0.15, - "grad_norm": 3.7455953837505005, - "learning_rate": 9.599006070686467e-06, - "loss": 0.7091, + "epoch": 0.1, + "grad_norm": 1.0093373920591115, + "learning_rate": 9.856207259765765e-06, + "loss": 0.4615, "step": 1469 }, { - "epoch": 0.15, - "grad_norm": 3.6074677438299947, - "learning_rate": 9.598337086621555e-06, - "loss": 0.6716, + "epoch": 0.1, + "grad_norm": 2.5590261701788166, + "learning_rate": 9.855933518636648e-06, + "loss": 0.5964, "step": 1470 }, { - "epoch": 0.15, - "grad_norm": 2.9274341045897745, - "learning_rate": 9.597667568335133e-06, - "loss": 0.6385, + "epoch": 0.1, + "grad_norm": 0.7952515854499836, + "learning_rate": 9.855659521000666e-06, + "loss": 0.456, "step": 1471 }, { - "epoch": 0.15, - "grad_norm": 3.7346926939077543, - "learning_rate": 9.596997515904983e-06, - "loss": 0.7569, + "epoch": 0.1, + "grad_norm": 1.5636008294253774, + "learning_rate": 9.855385266872297e-06, + "loss": 0.5056, "step": 1472 }, { - "epoch": 0.16, - "grad_norm": 3.5131800878247748, - "learning_rate": 9.59632692940895e-06, - "loss": 0.8693, + "epoch": 0.1, + "grad_norm": 2.1828141577885463, + "learning_rate": 9.855110756266027e-06, + "loss": 0.5175, "step": 1473 }, { - "epoch": 0.16, - "grad_norm": 3.097568621283518, - "learning_rate": 9.595655808924942e-06, - "loss": 0.7473, + "epoch": 0.1, + "grad_norm": 2.2446849292271036, + "learning_rate": 9.854835989196353e-06, + "loss": 0.6067, "step": 1474 }, { - "epoch": 0.16, - "grad_norm": 3.067535905696128, - "learning_rate": 9.594984154530926e-06, - "loss": 0.7308, + "epoch": 0.1, + "grad_norm": 2.032164353842534, + "learning_rate": 9.854560965677793e-06, + "loss": 0.5313, "step": 1475 }, { - "epoch": 0.16, - "grad_norm": 2.508065359892658, - "learning_rate": 9.594311966304933e-06, - "loss": 0.7814, + "epoch": 0.1, + "grad_norm": 1.8866945212161064, + "learning_rate": 9.854285685724874e-06, + "loss": 0.5943, "step": 1476 }, { - "epoch": 0.16, - "grad_norm": 2.2827015265359845, - "learning_rate": 9.593639244325057e-06, - "loss": 0.7721, + "epoch": 0.1, + "grad_norm": 2.0487169262760814, + "learning_rate": 9.854010149352136e-06, + "loss": 0.5515, "step": 1477 }, { - "epoch": 0.16, - "grad_norm": 2.738236724185965, - "learning_rate": 9.592965988669454e-06, - "loss": 0.7184, + "epoch": 0.1, + "grad_norm": 1.6827681019273515, + "learning_rate": 9.853734356574133e-06, + "loss": 0.549, "step": 1478 }, { - "epoch": 0.16, - "grad_norm": 2.6198830644856206, - "learning_rate": 9.592292199416338e-06, - "loss": 0.9262, + "epoch": 0.1, + "grad_norm": 2.9756537884457033, + "learning_rate": 9.853458307405434e-06, + "loss": 0.5952, "step": 1479 }, { - "epoch": 0.16, - "grad_norm": 2.552236548583453, - "learning_rate": 9.591617876643989e-06, - "loss": 0.7082, + "epoch": 0.11, + "grad_norm": 1.700323285138724, + "learning_rate": 9.853182001860625e-06, + "loss": 0.5739, "step": 1480 }, { - "epoch": 0.16, - "grad_norm": 2.826249720921075, - "learning_rate": 9.590943020430747e-06, - "loss": 0.6825, + "epoch": 0.11, + "grad_norm": 2.096009277136331, + "learning_rate": 9.852905439954294e-06, + "loss": 0.5791, "step": 1481 }, { - "epoch": 0.16, - "grad_norm": 2.860146961164607, - "learning_rate": 9.59026763085502e-06, - "loss": 0.6227, + "epoch": 0.11, + "grad_norm": 1.8899140128821095, + "learning_rate": 9.852628621701058e-06, + "loss": 0.5723, "step": 1482 }, { - "epoch": 0.16, - "grad_norm": 1.2203871767796366, - "learning_rate": 9.589591707995265e-06, - "loss": 0.6181, + "epoch": 0.11, + "grad_norm": 2.045682355152485, + "learning_rate": 9.852351547115531e-06, + "loss": 0.5662, "step": 1483 }, { - "epoch": 0.16, - "grad_norm": 2.401274298711083, - "learning_rate": 9.588915251930013e-06, - "loss": 0.6813, + "epoch": 0.11, + "grad_norm": 2.506298801233158, + "learning_rate": 9.852074216212355e-06, + "loss": 0.6104, "step": 1484 }, { - "epoch": 0.16, - "grad_norm": 4.331113197722243, - "learning_rate": 9.588238262737853e-06, - "loss": 0.6847, + "epoch": 0.11, + "grad_norm": 1.9678981949825465, + "learning_rate": 9.851796629006179e-06, + "loss": 0.6574, "step": 1485 }, { - "epoch": 0.16, - "grad_norm": 2.7893674560880104, - "learning_rate": 9.587560740497434e-06, - "loss": 0.6684, + "epoch": 0.11, + "grad_norm": 1.6412805618982835, + "learning_rate": 9.851518785511662e-06, + "loss": 0.62, "step": 1486 }, { - "epoch": 0.16, - "grad_norm": 3.812079227043104, - "learning_rate": 9.586882685287471e-06, - "loss": 0.6859, + "epoch": 0.11, + "grad_norm": 1.7657762269071653, + "learning_rate": 9.851240685743486e-06, + "loss": 0.5557, "step": 1487 }, { - "epoch": 0.16, - "grad_norm": 2.690594004910145, - "learning_rate": 9.586204097186738e-06, - "loss": 0.6719, + "epoch": 0.11, + "grad_norm": 2.093973366966402, + "learning_rate": 9.850962329716339e-06, + "loss": 0.6025, "step": 1488 }, { - "epoch": 0.16, - "grad_norm": 2.67029996471702, - "learning_rate": 9.585524976274068e-06, - "loss": 0.6752, + "epoch": 0.11, + "grad_norm": 1.9111357378609122, + "learning_rate": 9.850683717444922e-06, + "loss": 0.6162, "step": 1489 }, { - "epoch": 0.16, - "grad_norm": 2.783949918181014, - "learning_rate": 9.584845322628363e-06, - "loss": 0.6744, + "epoch": 0.11, + "grad_norm": 1.7179396545791499, + "learning_rate": 9.850404848943956e-06, + "loss": 0.5121, "step": 1490 }, { - "epoch": 0.16, - "grad_norm": 2.768161593958554, - "learning_rate": 9.58416513632858e-06, - "loss": 0.6731, + "epoch": 0.11, + "grad_norm": 2.0316296958324043, + "learning_rate": 9.850125724228172e-06, + "loss": 0.5919, "step": 1491 }, { - "epoch": 0.16, - "grad_norm": 2.7128750709002785, - "learning_rate": 9.583484417453744e-06, - "loss": 0.6595, + "epoch": 0.11, + "grad_norm": 3.8979095905076733, + "learning_rate": 9.84984634331231e-06, + "loss": 0.6592, "step": 1492 }, { - "epoch": 0.16, - "grad_norm": 3.1967481849379173, - "learning_rate": 9.582803166082938e-06, - "loss": 0.7063, + "epoch": 0.11, + "grad_norm": 3.2880244189660632, + "learning_rate": 9.849566706211133e-06, + "loss": 0.4916, "step": 1493 }, { - "epoch": 0.16, - "grad_norm": 2.5603570119876258, - "learning_rate": 9.582121382295309e-06, - "loss": 0.7163, + "epoch": 0.11, + "grad_norm": 1.6657361310543528, + "learning_rate": 9.84928681293941e-06, + "loss": 0.6745, "step": 1494 }, { - "epoch": 0.16, - "grad_norm": 2.275503794973825, - "learning_rate": 9.58143906617006e-06, - "loss": 0.7148, + "epoch": 0.11, + "grad_norm": 1.7937754617560573, + "learning_rate": 9.849006663511924e-06, + "loss": 0.6375, "step": 1495 }, { - "epoch": 0.16, - "grad_norm": 2.4819881710650984, - "learning_rate": 9.580756217786466e-06, - "loss": 0.6752, + "epoch": 0.11, + "grad_norm": 2.6951311812239105, + "learning_rate": 9.848726257943476e-06, + "loss": 0.5786, "step": 1496 }, { - "epoch": 0.16, - "grad_norm": 1.274480650090562, - "learning_rate": 9.580072837223857e-06, - "loss": 0.6004, + "epoch": 0.11, + "grad_norm": 2.289863702325345, + "learning_rate": 9.848445596248877e-06, + "loss": 0.6399, "step": 1497 }, { - "epoch": 0.16, - "grad_norm": 2.4619991807752157, - "learning_rate": 9.579388924561625e-06, - "loss": 0.7528, + "epoch": 0.11, + "grad_norm": 0.9292179696752689, + "learning_rate": 9.848164678442953e-06, + "loss": 0.4865, "step": 1498 }, { - "epoch": 0.16, - "grad_norm": 2.69869982453989, - "learning_rate": 9.578704479879225e-06, - "loss": 0.6724, + "epoch": 0.11, + "grad_norm": 1.8243361869346137, + "learning_rate": 9.847883504540545e-06, + "loss": 0.6432, "step": 1499 }, { - "epoch": 0.16, - "grad_norm": 2.587551021620836, - "learning_rate": 9.578019503256175e-06, - "loss": 0.661, + "epoch": 0.11, + "grad_norm": 2.683884634997996, + "learning_rate": 9.847602074556502e-06, + "loss": 0.5837, "step": 1500 }, { - "epoch": 0.16, - "grad_norm": 2.5984643485005297, - "learning_rate": 9.577333994772052e-06, - "loss": 0.7543, + "epoch": 0.11, + "grad_norm": 2.1554401485748014, + "learning_rate": 9.84732038850569e-06, + "loss": 0.6043, "step": 1501 }, { - "epoch": 0.16, - "grad_norm": 3.106286735099979, - "learning_rate": 9.576647954506498e-06, - "loss": 0.7503, + "epoch": 0.11, + "grad_norm": 1.6290773936268348, + "learning_rate": 9.847038446402992e-06, + "loss": 0.5487, "step": 1502 }, { - "epoch": 0.16, - "grad_norm": 2.5619268778049284, - "learning_rate": 9.575961382539211e-06, - "loss": 0.7057, + "epoch": 0.11, + "grad_norm": 2.3331277257409377, + "learning_rate": 9.846756248263299e-06, + "loss": 0.6153, "step": 1503 }, { - "epoch": 0.16, - "grad_norm": 2.6307425334372945, - "learning_rate": 9.575274278949962e-06, - "loss": 0.7321, + "epoch": 0.11, + "grad_norm": 1.9749741572803685, + "learning_rate": 9.846473794101517e-06, + "loss": 0.634, "step": 1504 }, { - "epoch": 0.16, - "grad_norm": 2.8345146127858487, - "learning_rate": 9.574586643818572e-06, - "loss": 0.7287, + "epoch": 0.11, + "grad_norm": 1.723253840874343, + "learning_rate": 9.846191083932569e-06, + "loss": 0.609, "step": 1505 }, { - "epoch": 0.16, - "grad_norm": 3.26642048267166, - "learning_rate": 9.57389847722493e-06, - "loss": 0.7251, + "epoch": 0.11, + "grad_norm": 1.844205542323179, + "learning_rate": 9.845908117771383e-06, + "loss": 0.582, "step": 1506 }, { - "epoch": 0.16, - "grad_norm": 2.5229552614331263, - "learning_rate": 9.573209779248985e-06, - "loss": 0.7519, + "epoch": 0.11, + "grad_norm": 1.8029672975852855, + "learning_rate": 9.845624895632914e-06, + "loss": 0.6385, "step": 1507 }, { - "epoch": 0.16, - "grad_norm": 2.80455601855804, - "learning_rate": 9.572520549970746e-06, - "loss": 0.7422, + "epoch": 0.11, + "grad_norm": 1.988096938597669, + "learning_rate": 9.845341417532117e-06, + "loss": 0.6499, "step": 1508 }, { - "epoch": 0.16, - "grad_norm": 2.5636231475624394, - "learning_rate": 9.571830789470288e-06, - "loss": 0.6824, + "epoch": 0.11, + "grad_norm": 2.0366305174923385, + "learning_rate": 9.845057683483969e-06, + "loss": 0.5613, "step": 1509 }, { - "epoch": 0.16, - "grad_norm": 2.455344120878093, - "learning_rate": 9.571140497827746e-06, - "loss": 0.7862, + "epoch": 0.11, + "grad_norm": 0.821054847799922, + "learning_rate": 9.844773693503456e-06, + "loss": 0.4616, "step": 1510 }, { - "epoch": 0.16, - "grad_norm": 2.9947982231220225, - "learning_rate": 9.570449675123313e-06, - "loss": 0.8056, + "epoch": 0.11, + "grad_norm": 0.896355283867609, + "learning_rate": 9.84448944760558e-06, + "loss": 0.4723, "step": 1511 }, { - "epoch": 0.16, - "grad_norm": 3.199158359526649, - "learning_rate": 9.56975832143725e-06, - "loss": 0.7388, + "epoch": 0.11, + "grad_norm": 1.8123919998872928, + "learning_rate": 9.844204945805355e-06, + "loss": 0.5644, "step": 1512 }, { - "epoch": 0.16, - "grad_norm": 2.481563398604672, - "learning_rate": 9.569066436849875e-06, - "loss": 0.7722, + "epoch": 0.11, + "grad_norm": 1.7316559427634801, + "learning_rate": 9.843920188117812e-06, + "loss": 0.5801, "step": 1513 }, { - "epoch": 0.16, - "grad_norm": 2.7913702575178583, - "learning_rate": 9.568374021441567e-06, - "loss": 0.6772, + "epoch": 0.11, + "grad_norm": 1.9866674963325475, + "learning_rate": 9.843635174557992e-06, + "loss": 0.5822, "step": 1514 }, { - "epoch": 0.16, - "grad_norm": 2.3892593790051593, - "learning_rate": 9.567681075292774e-06, - "loss": 0.7162, + "epoch": 0.11, + "grad_norm": 1.9964800827778924, + "learning_rate": 9.84334990514095e-06, + "loss": 0.614, "step": 1515 }, { - "epoch": 0.16, - "grad_norm": 2.3903154524770183, - "learning_rate": 9.566987598483995e-06, - "loss": 0.6662, + "epoch": 0.11, + "grad_norm": 2.2043228155621692, + "learning_rate": 9.843064379881751e-06, + "loss": 0.619, "step": 1516 }, { - "epoch": 0.16, - "grad_norm": 2.6872816236265624, - "learning_rate": 9.5662935910958e-06, - "loss": 0.6746, + "epoch": 0.11, + "grad_norm": 1.6488031515615715, + "learning_rate": 9.842778598795483e-06, + "loss": 0.6427, "step": 1517 }, { - "epoch": 0.16, - "grad_norm": 2.1845449131190704, - "learning_rate": 9.565599053208815e-06, - "loss": 0.713, + "epoch": 0.11, + "grad_norm": 1.9720703629603002, + "learning_rate": 9.842492561897241e-06, + "loss": 0.5264, "step": 1518 }, { - "epoch": 0.16, - "grad_norm": 2.520639847963599, - "learning_rate": 9.564903984903731e-06, - "loss": 0.6745, + "epoch": 0.11, + "grad_norm": 2.1347501938543463, + "learning_rate": 9.842206269202131e-06, + "loss": 0.5913, "step": 1519 }, { - "epoch": 0.16, - "grad_norm": 2.3349303925717697, - "learning_rate": 9.564208386261296e-06, - "loss": 0.6643, + "epoch": 0.11, + "grad_norm": 1.0228150264712537, + "learning_rate": 9.84191972072528e-06, + "loss": 0.4788, "step": 1520 }, { - "epoch": 0.16, - "grad_norm": 2.883067515708215, - "learning_rate": 9.563512257362325e-06, - "loss": 0.7143, + "epoch": 0.11, + "grad_norm": 1.796899264805008, + "learning_rate": 9.841632916481822e-06, + "loss": 0.581, "step": 1521 }, { - "epoch": 0.16, - "grad_norm": 2.403562641572306, - "learning_rate": 9.56281559828769e-06, - "loss": 0.5652, + "epoch": 0.11, + "grad_norm": 3.4717192049253547, + "learning_rate": 9.841345856486907e-06, + "loss": 0.5323, "step": 1522 }, { - "epoch": 0.16, - "grad_norm": 6.692764536743657, - "learning_rate": 9.562118409118334e-06, - "loss": 0.7098, + "epoch": 0.11, + "grad_norm": 1.9951087058069443, + "learning_rate": 9.8410585407557e-06, + "loss": 0.5739, "step": 1523 }, { - "epoch": 0.16, - "grad_norm": 2.24853739667993, - "learning_rate": 9.561420689935243e-06, - "loss": 0.7346, + "epoch": 0.11, + "grad_norm": 2.2886954156592316, + "learning_rate": 9.840770969303377e-06, + "loss": 0.6224, "step": 1524 }, { - "epoch": 0.16, - "grad_norm": 4.847945930120676, - "learning_rate": 9.560722440819485e-06, - "loss": 0.6834, + "epoch": 0.11, + "grad_norm": 1.9509597226831588, + "learning_rate": 9.840483142145129e-06, + "loss": 0.51, "step": 1525 }, { - "epoch": 0.16, - "grad_norm": 2.9793899903106524, - "learning_rate": 9.560023661852178e-06, - "loss": 0.6984, + "epoch": 0.11, + "grad_norm": 1.6522838256087842, + "learning_rate": 9.84019505929616e-06, + "loss": 0.5836, "step": 1526 }, { - "epoch": 0.16, - "grad_norm": 2.696356145427722, - "learning_rate": 9.559324353114503e-06, - "loss": 0.7272, + "epoch": 0.11, + "grad_norm": 2.2459818437562427, + "learning_rate": 9.839906720771685e-06, + "loss": 0.5695, "step": 1527 }, { - "epoch": 0.16, - "grad_norm": 2.2812497097713216, - "learning_rate": 9.558624514687703e-06, - "loss": 0.761, + "epoch": 0.11, + "grad_norm": 2.08916565525036, + "learning_rate": 9.839618126586938e-06, + "loss": 0.5882, "step": 1528 }, { - "epoch": 0.16, - "grad_norm": 2.271576386159374, - "learning_rate": 9.557924146653087e-06, - "loss": 0.7657, + "epoch": 0.11, + "grad_norm": 2.240821146309717, + "learning_rate": 9.839329276757163e-06, + "loss": 0.6425, "step": 1529 }, { - "epoch": 0.16, - "grad_norm": 2.7171270620464365, - "learning_rate": 9.557223249092017e-06, - "loss": 0.7263, + "epoch": 0.11, + "grad_norm": 1.8397566958723328, + "learning_rate": 9.839040171297619e-06, + "loss": 0.5905, "step": 1530 }, { - "epoch": 0.16, - "grad_norm": 2.8535792247554834, - "learning_rate": 9.556521822085924e-06, - "loss": 0.7297, + "epoch": 0.11, + "grad_norm": 1.6947902142134277, + "learning_rate": 9.838750810223575e-06, + "loss": 0.5847, "step": 1531 }, { - "epoch": 0.16, - "grad_norm": 2.4140439514306706, - "learning_rate": 9.555819865716298e-06, - "loss": 0.6768, + "epoch": 0.11, + "grad_norm": 1.7145726321095267, + "learning_rate": 9.838461193550317e-06, + "loss": 0.5508, "step": 1532 }, { - "epoch": 0.16, - "grad_norm": 2.9490982350238957, - "learning_rate": 9.555117380064689e-06, - "loss": 0.698, + "epoch": 0.11, + "grad_norm": 1.8443937488419102, + "learning_rate": 9.838171321293145e-06, + "loss": 0.536, "step": 1533 }, { - "epoch": 0.16, - "grad_norm": 2.49780482074063, - "learning_rate": 9.55441436521271e-06, - "loss": 0.6453, + "epoch": 0.11, + "grad_norm": 1.648708302139436, + "learning_rate": 9.83788119346737e-06, + "loss": 0.5262, "step": 1534 }, { - "epoch": 0.16, - "grad_norm": 3.075811387978172, - "learning_rate": 9.553710821242036e-06, - "loss": 0.6666, + "epoch": 0.11, + "grad_norm": 0.8761204977405773, + "learning_rate": 9.837590810088316e-06, + "loss": 0.4545, "step": 1535 }, { - "epoch": 0.16, - "grad_norm": 3.0553855565019585, - "learning_rate": 9.553006748234402e-06, - "loss": 0.7491, + "epoch": 0.11, + "grad_norm": 1.6647307695243208, + "learning_rate": 9.837300171171323e-06, + "loss": 0.5266, "step": 1536 }, { - "epoch": 0.16, - "grad_norm": 4.1401242046590845, - "learning_rate": 9.552302146271606e-06, - "loss": 0.7782, + "epoch": 0.11, + "grad_norm": 1.6120160196330557, + "learning_rate": 9.837009276731746e-06, + "loss": 0.5282, "step": 1537 }, { - "epoch": 0.16, - "grad_norm": 2.42939226201379, - "learning_rate": 9.551597015435505e-06, - "loss": 0.7143, + "epoch": 0.11, + "grad_norm": 1.071056511808313, + "learning_rate": 9.836718126784948e-06, + "loss": 0.4503, "step": 1538 }, { - "epoch": 0.16, - "grad_norm": 2.82555463707085, - "learning_rate": 9.550891355808022e-06, - "loss": 0.7285, + "epoch": 0.11, + "grad_norm": 0.8713131886550167, + "learning_rate": 9.836426721346308e-06, + "loss": 0.4529, "step": 1539 }, { - "epoch": 0.16, - "grad_norm": 2.6022856991586036, - "learning_rate": 9.550185167471134e-06, - "loss": 0.6323, + "epoch": 0.11, + "grad_norm": 1.7950699385501552, + "learning_rate": 9.836135060431223e-06, + "loss": 0.5694, "step": 1540 }, { - "epoch": 0.16, - "grad_norm": 2.716216112330096, - "learning_rate": 9.549478450506888e-06, - "loss": 0.6955, + "epoch": 0.11, + "grad_norm": 1.855999792749136, + "learning_rate": 9.835843144055096e-06, + "loss": 0.5851, "step": 1541 }, { - "epoch": 0.16, - "grad_norm": 2.9288414453383163, - "learning_rate": 9.548771204997387e-06, - "loss": 0.6652, + "epoch": 0.11, + "grad_norm": 1.730557104318448, + "learning_rate": 9.835550972233347e-06, + "loss": 0.5548, "step": 1542 }, { - "epoch": 0.16, - "grad_norm": 2.9260455534051664, - "learning_rate": 9.548063431024797e-06, - "loss": 0.6677, + "epoch": 0.11, + "grad_norm": 1.8633466601167028, + "learning_rate": 9.835258544981412e-06, + "loss": 0.7023, "step": 1543 }, { - "epoch": 0.16, - "grad_norm": 2.6013304107381066, - "learning_rate": 9.547355128671343e-06, - "loss": 0.6632, + "epoch": 0.11, + "grad_norm": 2.0905976904484556, + "learning_rate": 9.834965862314735e-06, + "loss": 0.6438, "step": 1544 }, { - "epoch": 0.16, - "grad_norm": 2.7326280366062523, - "learning_rate": 9.546646298019315e-06, - "loss": 0.7204, + "epoch": 0.11, + "grad_norm": 1.9116822668911706, + "learning_rate": 9.83467292424878e-06, + "loss": 0.6247, "step": 1545 }, { - "epoch": 0.16, - "grad_norm": 2.1529382962472567, - "learning_rate": 9.545936939151066e-06, - "loss": 0.7363, + "epoch": 0.11, + "grad_norm": 1.8892253985840513, + "learning_rate": 9.834379730799018e-06, + "loss": 0.5883, "step": 1546 }, { - "epoch": 0.16, - "grad_norm": 3.48827212379924, - "learning_rate": 9.545227052149002e-06, - "loss": 0.761, + "epoch": 0.11, + "grad_norm": 1.820160483902085, + "learning_rate": 9.834086281980939e-06, + "loss": 0.6035, "step": 1547 }, { - "epoch": 0.16, - "grad_norm": 2.6924419534835637, - "learning_rate": 9.544516637095598e-06, - "loss": 0.7262, + "epoch": 0.11, + "grad_norm": 1.6730970553235953, + "learning_rate": 9.833792577810039e-06, + "loss": 0.5673, "step": 1548 }, { - "epoch": 0.16, - "grad_norm": 2.4210226974371554, - "learning_rate": 9.54380569407339e-06, - "loss": 0.6652, + "epoch": 0.11, + "grad_norm": 2.035293034977337, + "learning_rate": 9.83349861830184e-06, + "loss": 0.5326, "step": 1549 }, { - "epoch": 0.16, - "grad_norm": 2.91586157827423, - "learning_rate": 9.543094223164967e-06, - "loss": 0.6746, + "epoch": 0.11, + "grad_norm": 1.7761475601402066, + "learning_rate": 9.833204403471863e-06, + "loss": 0.5769, "step": 1550 }, { - "epoch": 0.16, - "grad_norm": 2.8878136097168228, - "learning_rate": 9.542382224452995e-06, - "loss": 0.6712, + "epoch": 0.11, + "grad_norm": 1.8766534333938016, + "learning_rate": 9.832909933335654e-06, + "loss": 0.6043, "step": 1551 }, { - "epoch": 0.16, - "grad_norm": 2.485973778059242, - "learning_rate": 9.541669698020184e-06, - "loss": 0.7157, + "epoch": 0.11, + "grad_norm": 3.1027282438101396, + "learning_rate": 9.832615207908764e-06, + "loss": 0.5968, "step": 1552 }, { - "epoch": 0.16, - "grad_norm": 2.6385514951897298, - "learning_rate": 9.540956643949317e-06, - "loss": 0.7383, + "epoch": 0.11, + "grad_norm": 2.6293091433880185, + "learning_rate": 9.832320227206766e-06, + "loss": 0.5999, "step": 1553 }, { - "epoch": 0.16, - "grad_norm": 3.2286222175015378, - "learning_rate": 9.540243062323234e-06, - "loss": 0.6431, + "epoch": 0.11, + "grad_norm": 1.7368266857328927, + "learning_rate": 9.832024991245239e-06, + "loss": 0.6164, "step": 1554 }, { - "epoch": 0.16, - "grad_norm": 2.6255728686862043, - "learning_rate": 9.539528953224835e-06, - "loss": 0.6488, + "epoch": 0.11, + "grad_norm": 1.7576112320855033, + "learning_rate": 9.831729500039777e-06, + "loss": 0.6785, "step": 1555 }, { - "epoch": 0.16, - "grad_norm": 5.294558982143112, - "learning_rate": 9.538814316737085e-06, - "loss": 0.7291, + "epoch": 0.11, + "grad_norm": 1.8307473909175571, + "learning_rate": 9.831433753605991e-06, + "loss": 0.6213, "step": 1556 }, { - "epoch": 0.16, - "grad_norm": 2.7649291179617075, - "learning_rate": 9.53809915294301e-06, - "loss": 0.7938, + "epoch": 0.11, + "grad_norm": 1.6772546349476507, + "learning_rate": 9.831137751959505e-06, + "loss": 0.5818, "step": 1557 }, { - "epoch": 0.16, - "grad_norm": 2.6080959910423047, - "learning_rate": 9.53738346192569e-06, - "loss": 0.6932, + "epoch": 0.11, + "grad_norm": 1.6016211343928812, + "learning_rate": 9.830841495115953e-06, + "loss": 0.5786, "step": 1558 }, { - "epoch": 0.16, - "grad_norm": 3.0121425622421936, - "learning_rate": 9.536667243768279e-06, - "loss": 0.689, + "epoch": 0.11, + "grad_norm": 1.8589299776562003, + "learning_rate": 9.830544983090981e-06, + "loss": 0.6085, "step": 1559 }, { - "epoch": 0.16, - "grad_norm": 3.332705210852358, - "learning_rate": 9.53595049855398e-06, - "loss": 0.7032, + "epoch": 0.11, + "grad_norm": 1.8260544733557214, + "learning_rate": 9.830248215900258e-06, + "loss": 0.5315, "step": 1560 }, { - "epoch": 0.16, - "grad_norm": 2.5824358159901704, - "learning_rate": 9.535233226366067e-06, - "loss": 0.6877, + "epoch": 0.11, + "grad_norm": 1.670022879962204, + "learning_rate": 9.829951193559454e-06, + "loss": 0.5996, "step": 1561 }, { - "epoch": 0.16, - "grad_norm": 2.7223244944225486, - "learning_rate": 9.534515427287865e-06, - "loss": 0.6517, + "epoch": 0.11, + "grad_norm": 0.9624733801027308, + "learning_rate": 9.829653916084266e-06, + "loss": 0.4457, "step": 1562 }, { - "epoch": 0.16, - "grad_norm": 2.0686395234828074, - "learning_rate": 9.53379710140277e-06, - "loss": 0.7079, + "epoch": 0.11, + "grad_norm": 1.9469722046258957, + "learning_rate": 9.82935638349039e-06, + "loss": 0.6246, "step": 1563 }, { - "epoch": 0.16, - "grad_norm": 3.0167637777660223, - "learning_rate": 9.533078248794232e-06, - "loss": 0.6687, + "epoch": 0.11, + "grad_norm": 1.7551382150165016, + "learning_rate": 9.829058595793547e-06, + "loss": 0.5039, "step": 1564 }, { - "epoch": 0.16, - "grad_norm": 2.432973784171849, - "learning_rate": 9.532358869545767e-06, - "loss": 0.7802, + "epoch": 0.11, + "grad_norm": 1.677544766974154, + "learning_rate": 9.828760553009465e-06, + "loss": 0.595, "step": 1565 }, { - "epoch": 0.16, - "grad_norm": 2.2926310921200885, - "learning_rate": 9.531638963740953e-06, - "loss": 0.7231, + "epoch": 0.11, + "grad_norm": 1.6836328474471074, + "learning_rate": 9.828462255153888e-06, + "loss": 0.5477, "step": 1566 }, { - "epoch": 0.16, - "grad_norm": 2.6393048434940276, - "learning_rate": 9.530918531463423e-06, - "loss": 0.6623, + "epoch": 0.11, + "grad_norm": 1.7724968537330028, + "learning_rate": 9.828163702242574e-06, + "loss": 0.5929, "step": 1567 }, { - "epoch": 0.17, - "grad_norm": 2.481069201927267, - "learning_rate": 9.530197572796873e-06, - "loss": 0.735, + "epoch": 0.11, + "grad_norm": 1.6949652615383937, + "learning_rate": 9.827864894291295e-06, + "loss": 0.5635, "step": 1568 }, { - "epoch": 0.17, - "grad_norm": 2.6013622318011276, - "learning_rate": 9.529476087825067e-06, - "loss": 0.6855, + "epoch": 0.11, + "grad_norm": 1.640802326747633, + "learning_rate": 9.827565831315832e-06, + "loss": 0.6115, "step": 1569 }, { - "epoch": 0.17, - "grad_norm": 3.4249010091697434, - "learning_rate": 9.528754076631821e-06, - "loss": 0.7398, + "epoch": 0.11, + "grad_norm": 0.924672089187853, + "learning_rate": 9.827266513331983e-06, + "loss": 0.4711, "step": 1570 }, { - "epoch": 0.17, - "grad_norm": 2.2070633925039465, - "learning_rate": 9.528031539301016e-06, - "loss": 0.7574, + "epoch": 0.11, + "grad_norm": 1.7689106298480262, + "learning_rate": 9.82696694035556e-06, + "loss": 0.6107, "step": 1571 }, { - "epoch": 0.17, - "grad_norm": 2.555841750294239, - "learning_rate": 9.5273084759166e-06, - "loss": 0.6432, + "epoch": 0.11, + "grad_norm": 1.7981877657524108, + "learning_rate": 9.826667112402389e-06, + "loss": 0.5117, "step": 1572 }, { - "epoch": 0.17, - "grad_norm": 1.457477490220107, - "learning_rate": 9.526584886562571e-06, - "loss": 0.6151, + "epoch": 0.11, + "grad_norm": 1.7220863478534356, + "learning_rate": 9.826367029488305e-06, + "loss": 0.526, "step": 1573 }, { - "epoch": 0.17, - "grad_norm": 3.048705570425487, - "learning_rate": 9.525860771322995e-06, - "loss": 0.7126, + "epoch": 0.11, + "grad_norm": 2.5018013839811397, + "learning_rate": 9.82606669162916e-06, + "loss": 0.5547, "step": 1574 }, { - "epoch": 0.17, - "grad_norm": 3.438910164173861, - "learning_rate": 9.525136130281995e-06, - "loss": 0.79, + "epoch": 0.11, + "grad_norm": 1.7725953143831277, + "learning_rate": 9.82576609884082e-06, + "loss": 0.5089, "step": 1575 }, { - "epoch": 0.17, - "grad_norm": 4.28221697375792, - "learning_rate": 9.524410963523763e-06, - "loss": 0.7654, + "epoch": 0.11, + "grad_norm": 1.895532656094033, + "learning_rate": 9.825465251139162e-06, + "loss": 0.5905, "step": 1576 }, { - "epoch": 0.17, - "grad_norm": 3.0467966030740645, - "learning_rate": 9.523685271132543e-06, - "loss": 0.7272, + "epoch": 0.11, + "grad_norm": 1.4908339176305159, + "learning_rate": 9.825164148540079e-06, + "loss": 0.5368, "step": 1577 }, { - "epoch": 0.17, - "grad_norm": 2.3181479200807398, - "learning_rate": 9.522959053192646e-06, - "loss": 0.7779, + "epoch": 0.11, + "grad_norm": 2.079911526669793, + "learning_rate": 9.824862791059476e-06, + "loss": 0.6269, "step": 1578 }, { - "epoch": 0.17, - "grad_norm": 2.571771113992117, - "learning_rate": 9.522232309788439e-06, - "loss": 0.7544, + "epoch": 0.11, + "grad_norm": 2.859521367961629, + "learning_rate": 9.824561178713272e-06, + "loss": 0.6996, "step": 1579 }, { - "epoch": 0.17, - "grad_norm": 3.1183269342095508, - "learning_rate": 9.521505041004356e-06, - "loss": 0.7422, + "epoch": 0.11, + "grad_norm": 2.169168318659681, + "learning_rate": 9.824259311517396e-06, + "loss": 0.557, "step": 1580 }, { - "epoch": 0.17, - "grad_norm": 2.416853737352066, - "learning_rate": 9.520777246924887e-06, - "loss": 0.7424, + "epoch": 0.11, + "grad_norm": 1.88249992411124, + "learning_rate": 9.823957189487801e-06, + "loss": 0.5377, "step": 1581 }, { - "epoch": 0.17, - "grad_norm": 2.885932690234875, - "learning_rate": 9.520048927634587e-06, - "loss": 0.6748, + "epoch": 0.11, + "grad_norm": 1.7778815986272392, + "learning_rate": 9.823654812640438e-06, + "loss": 0.5912, "step": 1582 }, { - "epoch": 0.17, - "grad_norm": 2.615546636686311, - "learning_rate": 9.519320083218067e-06, - "loss": 0.7059, + "epoch": 0.11, + "grad_norm": 1.8539350460700688, + "learning_rate": 9.823352180991283e-06, + "loss": 0.4998, "step": 1583 }, { - "epoch": 0.17, - "grad_norm": 2.755154151212538, - "learning_rate": 9.518590713760004e-06, - "loss": 0.7826, + "epoch": 0.11, + "grad_norm": 1.7673844116002844, + "learning_rate": 9.823049294556324e-06, + "loss": 0.6286, "step": 1584 }, { - "epoch": 0.17, - "grad_norm": 2.132180252839951, - "learning_rate": 9.517860819345136e-06, - "loss": 0.6665, + "epoch": 0.11, + "grad_norm": 1.877665104666314, + "learning_rate": 9.822746153351557e-06, + "loss": 0.5398, "step": 1585 }, { - "epoch": 0.17, - "grad_norm": 2.255957083552413, - "learning_rate": 9.517130400058255e-06, - "loss": 0.7323, + "epoch": 0.11, + "grad_norm": 1.641569988839206, + "learning_rate": 9.822442757392996e-06, + "loss": 0.5956, "step": 1586 }, { - "epoch": 0.17, - "grad_norm": 2.2483764789100205, - "learning_rate": 9.516399455984222e-06, - "loss": 0.7601, + "epoch": 0.11, + "grad_norm": 1.785509914706729, + "learning_rate": 9.822139106696668e-06, + "loss": 0.5902, "step": 1587 }, { - "epoch": 0.17, - "grad_norm": 3.352631648887012, - "learning_rate": 9.515667987207958e-06, - "loss": 0.7785, + "epoch": 0.11, + "grad_norm": 1.9349241008709828, + "learning_rate": 9.821835201278615e-06, + "loss": 0.53, "step": 1588 }, { - "epoch": 0.17, - "grad_norm": 2.0869685444798676, - "learning_rate": 9.514935993814438e-06, - "loss": 0.6793, + "epoch": 0.11, + "grad_norm": 1.6217658983953225, + "learning_rate": 9.821531041154885e-06, + "loss": 0.5612, "step": 1589 }, { - "epoch": 0.17, - "grad_norm": 2.3425785545443816, - "learning_rate": 9.514203475888706e-06, - "loss": 0.781, + "epoch": 0.11, + "grad_norm": 3.9560051568541152, + "learning_rate": 9.821226626341552e-06, + "loss": 0.6205, "step": 1590 }, { - "epoch": 0.17, - "grad_norm": 2.86315297570834, - "learning_rate": 9.513470433515866e-06, - "loss": 0.6503, + "epoch": 0.11, + "grad_norm": 1.7990213566814486, + "learning_rate": 9.82092195685469e-06, + "loss": 0.5442, "step": 1591 }, { - "epoch": 0.17, - "grad_norm": 3.1652116829667896, - "learning_rate": 9.512736866781076e-06, - "loss": 0.681, + "epoch": 0.11, + "grad_norm": 1.779142961331543, + "learning_rate": 9.820617032710393e-06, + "loss": 0.6246, "step": 1592 }, { - "epoch": 0.17, - "grad_norm": 3.766273085621574, - "learning_rate": 9.512002775769562e-06, - "loss": 0.7644, + "epoch": 0.11, + "grad_norm": 0.8107379121808159, + "learning_rate": 9.820311853924771e-06, + "loss": 0.4663, "step": 1593 }, { - "epoch": 0.17, - "grad_norm": 2.2909986490602074, - "learning_rate": 9.511268160566609e-06, - "loss": 0.6739, + "epoch": 0.11, + "grad_norm": 1.7208302145817087, + "learning_rate": 9.820006420513944e-06, + "loss": 0.578, "step": 1594 }, { - "epoch": 0.17, - "grad_norm": 2.075660637359557, - "learning_rate": 9.510533021257562e-06, - "loss": 0.6721, + "epoch": 0.11, + "grad_norm": 2.9151133571688486, + "learning_rate": 9.819700732494044e-06, + "loss": 0.5764, "step": 1595 }, { - "epoch": 0.17, - "grad_norm": 2.21371811437727, - "learning_rate": 9.509797357927826e-06, - "loss": 0.6669, + "epoch": 0.11, + "grad_norm": 0.8662837230965708, + "learning_rate": 9.819394789881222e-06, + "loss": 0.4665, "step": 1596 }, { - "epoch": 0.17, - "grad_norm": 2.2650689770495087, - "learning_rate": 9.50906117066287e-06, - "loss": 0.6818, + "epoch": 0.11, + "grad_norm": 1.4904979348105627, + "learning_rate": 9.819088592691634e-06, + "loss": 0.5445, "step": 1597 }, { - "epoch": 0.17, - "grad_norm": 8.894316490655317, - "learning_rate": 9.508324459548221e-06, - "loss": 0.6128, + "epoch": 0.11, + "grad_norm": 1.7093385780023105, + "learning_rate": 9.81878214094146e-06, + "loss": 0.5896, "step": 1598 }, { - "epoch": 0.17, - "grad_norm": 2.363686348557055, - "learning_rate": 9.50758722466947e-06, - "loss": 0.6766, + "epoch": 0.11, + "grad_norm": 1.8134905269016217, + "learning_rate": 9.818475434646884e-06, + "loss": 0.5286, "step": 1599 }, { - "epoch": 0.17, - "grad_norm": 2.1204545096681504, - "learning_rate": 9.506849466112264e-06, - "loss": 0.7487, + "epoch": 0.11, + "grad_norm": 1.68852689279498, + "learning_rate": 9.818168473824108e-06, + "loss": 0.5887, "step": 1600 }, { - "epoch": 0.17, - "grad_norm": 2.445721487013043, - "learning_rate": 9.506111183962316e-06, - "loss": 0.66, + "epoch": 0.11, + "grad_norm": 0.8660182807726676, + "learning_rate": 9.817861258489347e-06, + "loss": 0.4715, "step": 1601 }, { - "epoch": 0.17, - "grad_norm": 2.9704281101668144, - "learning_rate": 9.505372378305398e-06, - "loss": 0.6566, + "epoch": 0.11, + "grad_norm": 1.5622628909502618, + "learning_rate": 9.81755378865883e-06, + "loss": 0.6048, "step": 1602 }, { - "epoch": 0.17, - "grad_norm": 4.032858460518241, - "learning_rate": 9.504633049227338e-06, - "loss": 0.7025, + "epoch": 0.11, + "grad_norm": 1.6420845708675185, + "learning_rate": 9.817246064348797e-06, + "loss": 0.5727, "step": 1603 }, { - "epoch": 0.17, - "grad_norm": 2.3197467695242198, - "learning_rate": 9.503893196814034e-06, - "loss": 0.6593, + "epoch": 0.11, + "grad_norm": 1.8233446934041162, + "learning_rate": 9.816938085575505e-06, + "loss": 0.535, "step": 1604 }, { - "epoch": 0.17, - "grad_norm": 3.003637291105467, - "learning_rate": 9.503152821151435e-06, - "loss": 0.7282, + "epoch": 0.11, + "grad_norm": 1.4395387955754717, + "learning_rate": 9.816629852355219e-06, + "loss": 0.5764, "step": 1605 }, { - "epoch": 0.17, - "grad_norm": 2.974823409792684, - "learning_rate": 9.502411922325561e-06, - "loss": 0.644, + "epoch": 0.11, + "grad_norm": 0.8827211499418127, + "learning_rate": 9.816321364704227e-06, + "loss": 0.4747, "step": 1606 }, { - "epoch": 0.17, - "grad_norm": 2.3999354547850578, - "learning_rate": 9.501670500422483e-06, - "loss": 0.7695, + "epoch": 0.11, + "grad_norm": 1.7553925011685585, + "learning_rate": 9.816012622638817e-06, + "loss": 0.5906, "step": 1607 }, { - "epoch": 0.17, - "grad_norm": 3.142048720747408, - "learning_rate": 9.500928555528341e-06, - "loss": 0.697, + "epoch": 0.11, + "grad_norm": 1.8728316307807868, + "learning_rate": 9.815703626175303e-06, + "loss": 0.5548, "step": 1608 }, { - "epoch": 0.17, - "grad_norm": 2.1918483417994215, - "learning_rate": 9.500186087729331e-06, - "loss": 0.6865, + "epoch": 0.11, + "grad_norm": 1.9727137353619058, + "learning_rate": 9.815394375330005e-06, + "loss": 0.5571, "step": 1609 }, { - "epoch": 0.17, - "grad_norm": 2.3130950683881673, - "learning_rate": 9.49944309711171e-06, - "loss": 0.6882, + "epoch": 0.11, + "grad_norm": 2.1967351453914095, + "learning_rate": 9.81508487011926e-06, + "loss": 0.5078, "step": 1610 }, { - "epoch": 0.17, - "grad_norm": 2.2722152426733166, - "learning_rate": 9.498699583761795e-06, - "loss": 0.7057, + "epoch": 0.11, + "grad_norm": 1.7560020321923198, + "learning_rate": 9.814775110559415e-06, + "loss": 0.6072, "step": 1611 }, { - "epoch": 0.17, - "grad_norm": 2.3067065149092674, - "learning_rate": 9.497955547765966e-06, - "loss": 0.6129, + "epoch": 0.11, + "grad_norm": 1.6380506234046546, + "learning_rate": 9.814465096666837e-06, + "loss": 0.5563, "step": 1612 }, { - "epoch": 0.17, - "grad_norm": 3.000454383678098, - "learning_rate": 9.497210989210665e-06, - "loss": 0.709, + "epoch": 0.11, + "grad_norm": 1.4790109115036612, + "learning_rate": 9.814154828457895e-06, + "loss": 0.5828, "step": 1613 }, { - "epoch": 0.17, - "grad_norm": 2.6049845885606016, - "learning_rate": 9.49646590818239e-06, - "loss": 0.7035, + "epoch": 0.11, + "grad_norm": 1.6659779725324986, + "learning_rate": 9.813844305948984e-06, + "loss": 0.6296, "step": 1614 }, { - "epoch": 0.17, - "grad_norm": 4.5829176336530795, - "learning_rate": 9.495720304767705e-06, - "loss": 0.6982, + "epoch": 0.11, + "grad_norm": 2.075095849674983, + "learning_rate": 9.813533529156507e-06, + "loss": 0.5573, "step": 1615 }, { - "epoch": 0.17, - "grad_norm": 2.8362441150996003, - "learning_rate": 9.494974179053233e-06, - "loss": 0.7086, + "epoch": 0.11, + "grad_norm": 1.4698755951975089, + "learning_rate": 9.813222498096877e-06, + "loss": 0.5986, "step": 1616 }, { - "epoch": 0.17, - "grad_norm": 7.6175372207495045, - "learning_rate": 9.494227531125652e-06, - "loss": 0.7028, + "epoch": 0.11, + "grad_norm": 1.793025537997058, + "learning_rate": 9.812911212786525e-06, + "loss": 0.5242, "step": 1617 }, { - "epoch": 0.17, - "grad_norm": 3.522650359801413, - "learning_rate": 9.493480361071707e-06, - "loss": 0.7281, + "epoch": 0.11, + "grad_norm": 1.6963956019489854, + "learning_rate": 9.812599673241896e-06, + "loss": 0.6089, "step": 1618 }, { - "epoch": 0.17, - "grad_norm": 2.4815378042784713, - "learning_rate": 9.492732668978205e-06, - "loss": 0.6899, + "epoch": 0.11, + "grad_norm": 1.8024032896597002, + "learning_rate": 9.812287879479442e-06, + "loss": 0.6022, "step": 1619 }, { - "epoch": 0.17, - "grad_norm": 3.439653241146134, - "learning_rate": 9.491984454932009e-06, - "loss": 0.6359, + "epoch": 0.11, + "grad_norm": 1.017901370911912, + "learning_rate": 9.81197583151564e-06, + "loss": 0.4632, "step": 1620 }, { - "epoch": 0.17, - "grad_norm": 2.322479399522989, - "learning_rate": 9.491235719020042e-06, - "loss": 0.6365, + "epoch": 0.12, + "grad_norm": 1.8083063949841252, + "learning_rate": 9.811663529366966e-06, + "loss": 0.5672, "step": 1621 }, { - "epoch": 0.17, - "grad_norm": 2.8722680257735367, - "learning_rate": 9.490486461329293e-06, - "loss": 0.7157, + "epoch": 0.12, + "grad_norm": 1.9075548432686693, + "learning_rate": 9.81135097304992e-06, + "loss": 0.6011, "step": 1622 }, { - "epoch": 0.17, - "grad_norm": 2.8325212931006036, - "learning_rate": 9.489736681946809e-06, - "loss": 0.7014, + "epoch": 0.12, + "grad_norm": 2.1158141447699537, + "learning_rate": 9.811038162581015e-06, + "loss": 0.6131, "step": 1623 }, { - "epoch": 0.17, - "grad_norm": 2.8149300073276424, - "learning_rate": 9.488986380959694e-06, - "loss": 0.7507, + "epoch": 0.12, + "grad_norm": 0.9678323502907267, + "learning_rate": 9.81072509797677e-06, + "loss": 0.4665, "step": 1624 }, { - "epoch": 0.17, - "grad_norm": 4.399930194308783, - "learning_rate": 9.488235558455118e-06, - "loss": 0.7731, + "epoch": 0.12, + "grad_norm": 1.5052479744935263, + "learning_rate": 9.810411779253725e-06, + "loss": 0.6009, "step": 1625 }, { - "epoch": 0.17, - "grad_norm": 3.4240902602318832, - "learning_rate": 9.487484214520308e-06, - "loss": 0.6616, + "epoch": 0.12, + "grad_norm": 1.8632299582540817, + "learning_rate": 9.810098206428432e-06, + "loss": 0.5452, "step": 1626 }, { - "epoch": 0.17, - "grad_norm": 3.3880823758905465, - "learning_rate": 9.486732349242556e-06, - "loss": 0.6865, + "epoch": 0.12, + "grad_norm": 1.5085253336266504, + "learning_rate": 9.80978437951745e-06, + "loss": 0.5733, "step": 1627 }, { - "epoch": 0.17, - "grad_norm": 2.434462113134042, - "learning_rate": 9.485979962709209e-06, - "loss": 0.7546, + "epoch": 0.12, + "grad_norm": 1.6984652842263934, + "learning_rate": 9.80947029853736e-06, + "loss": 0.5591, "step": 1628 }, { - "epoch": 0.17, - "grad_norm": 3.140235982386299, - "learning_rate": 9.485227055007676e-06, - "loss": 0.6478, + "epoch": 0.12, + "grad_norm": 1.7951071243314094, + "learning_rate": 9.809155963504753e-06, + "loss": 0.551, "step": 1629 }, { - "epoch": 0.17, - "grad_norm": 2.6979154962012895, - "learning_rate": 9.48447362622543e-06, - "loss": 0.7336, + "epoch": 0.12, + "grad_norm": 3.6827709780243767, + "learning_rate": 9.808841374436233e-06, + "loss": 0.6769, "step": 1630 }, { - "epoch": 0.17, - "grad_norm": 3.564891032405616, - "learning_rate": 9.483719676450003e-06, - "loss": 0.6995, + "epoch": 0.12, + "grad_norm": 1.6784552134007635, + "learning_rate": 9.808526531348415e-06, + "loss": 0.611, "step": 1631 }, { - "epoch": 0.17, - "grad_norm": 5.186995520230965, - "learning_rate": 9.482965205768983e-06, - "loss": 0.7284, + "epoch": 0.12, + "grad_norm": 1.6783044488443546, + "learning_rate": 9.808211434257933e-06, + "loss": 0.5133, "step": 1632 }, { - "epoch": 0.17, - "grad_norm": 2.93274944592198, - "learning_rate": 9.482210214270026e-06, - "loss": 0.7065, + "epoch": 0.12, + "grad_norm": 2.0318226917064615, + "learning_rate": 9.807896083181431e-06, + "loss": 0.6137, "step": 1633 }, { - "epoch": 0.17, - "grad_norm": 2.3964166552732427, - "learning_rate": 9.481454702040842e-06, - "loss": 0.7386, + "epoch": 0.12, + "grad_norm": 1.5583388671532634, + "learning_rate": 9.807580478135565e-06, + "loss": 0.5543, "step": 1634 }, { - "epoch": 0.17, - "grad_norm": 2.290346577796857, - "learning_rate": 9.480698669169207e-06, - "loss": 0.6791, + "epoch": 0.12, + "grad_norm": 1.7716049186121556, + "learning_rate": 9.80726461913701e-06, + "loss": 0.5514, "step": 1635 }, { - "epoch": 0.17, - "grad_norm": 6.335965960819836, - "learning_rate": 9.479942115742951e-06, - "loss": 0.6365, + "epoch": 0.12, + "grad_norm": 3.432260157933293, + "learning_rate": 9.806948506202446e-06, + "loss": 0.5992, "step": 1636 }, { - "epoch": 0.17, - "grad_norm": 1.9605346112705178, - "learning_rate": 9.47918504184997e-06, - "loss": 0.6899, + "epoch": 0.12, + "grad_norm": 1.504919761136113, + "learning_rate": 9.806632139348578e-06, + "loss": 0.5316, "step": 1637 }, { - "epoch": 0.17, - "grad_norm": 2.7418810164216705, - "learning_rate": 9.47842744757822e-06, - "loss": 0.6344, + "epoch": 0.12, + "grad_norm": 1.6976097015709428, + "learning_rate": 9.80631551859211e-06, + "loss": 0.6231, "step": 1638 }, { - "epoch": 0.17, - "grad_norm": 2.2563707246896096, - "learning_rate": 9.477669333015714e-06, - "loss": 0.5757, + "epoch": 0.12, + "grad_norm": 1.6266425970762441, + "learning_rate": 9.80599864394977e-06, + "loss": 0.6289, "step": 1639 }, { - "epoch": 0.17, - "grad_norm": 3.013900658553373, - "learning_rate": 9.476910698250529e-06, - "loss": 0.7077, + "epoch": 0.12, + "grad_norm": 1.7771037127262488, + "learning_rate": 9.805681515438298e-06, + "loss": 0.5222, "step": 1640 }, { - "epoch": 0.17, - "grad_norm": 2.396575665358366, - "learning_rate": 9.4761515433708e-06, - "loss": 0.7228, + "epoch": 0.12, + "grad_norm": 0.9208656386535256, + "learning_rate": 9.805364133074444e-06, + "loss": 0.4447, "step": 1641 }, { - "epoch": 0.17, - "grad_norm": 2.40289964027477, - "learning_rate": 9.475391868464725e-06, - "loss": 0.7134, + "epoch": 0.12, + "grad_norm": 1.789268403930533, + "learning_rate": 9.805046496874974e-06, + "loss": 0.6743, "step": 1642 }, { - "epoch": 0.17, - "grad_norm": 2.387400572157838, - "learning_rate": 9.474631673620558e-06, - "loss": 0.7249, + "epoch": 0.12, + "grad_norm": 1.7367432836619436, + "learning_rate": 9.804728606856666e-06, + "loss": 0.5984, "step": 1643 }, { - "epoch": 0.17, - "grad_norm": 2.470393309910611, - "learning_rate": 9.47387095892662e-06, - "loss": 0.7756, + "epoch": 0.12, + "grad_norm": 1.7749041284997291, + "learning_rate": 9.804410463036313e-06, + "loss": 0.6539, "step": 1644 }, { - "epoch": 0.17, - "grad_norm": 2.318559436180576, - "learning_rate": 9.473109724471287e-06, - "loss": 0.6502, + "epoch": 0.12, + "grad_norm": 1.7572920048536245, + "learning_rate": 9.80409206543072e-06, + "loss": 0.643, "step": 1645 }, { - "epoch": 0.17, - "grad_norm": 2.5401989407597005, - "learning_rate": 9.472347970342995e-06, - "loss": 0.6817, + "epoch": 0.12, + "grad_norm": 0.9562006887458541, + "learning_rate": 9.803773414056703e-06, + "loss": 0.4529, "step": 1646 }, { - "epoch": 0.17, - "grad_norm": 3.2594402322006175, - "learning_rate": 9.471585696630245e-06, - "loss": 0.7104, + "epoch": 0.12, + "grad_norm": 2.036531148913945, + "learning_rate": 9.8034545089311e-06, + "loss": 0.6162, "step": 1647 }, { - "epoch": 0.17, - "grad_norm": 1.1769351591530914, - "learning_rate": 9.470822903421595e-06, - "loss": 0.6472, + "epoch": 0.12, + "grad_norm": 1.8886877480460629, + "learning_rate": 9.803135350070752e-06, + "loss": 0.5699, "step": 1648 }, { - "epoch": 0.17, - "grad_norm": 2.540121387003377, - "learning_rate": 9.470059590805663e-06, - "loss": 0.715, + "epoch": 0.12, + "grad_norm": 1.712058000118822, + "learning_rate": 9.802815937492518e-06, + "loss": 0.6111, "step": 1649 }, { - "epoch": 0.17, - "grad_norm": 2.3993005705838404, - "learning_rate": 9.46929575887113e-06, - "loss": 0.7426, + "epoch": 0.12, + "grad_norm": 1.69819920057338, + "learning_rate": 9.802496271213276e-06, + "loss": 0.5954, "step": 1650 }, { - "epoch": 0.17, - "grad_norm": 2.4076747230692574, - "learning_rate": 9.468531407706733e-06, - "loss": 0.6518, + "epoch": 0.12, + "grad_norm": 1.5828689901871047, + "learning_rate": 9.802176351249905e-06, + "loss": 0.5664, "step": 1651 }, { - "epoch": 0.17, - "grad_norm": 2.3345495527645443, - "learning_rate": 9.467766537401278e-06, - "loss": 0.6538, + "epoch": 0.12, + "grad_norm": 1.7122568104346672, + "learning_rate": 9.801856177619307e-06, + "loss": 0.5544, "step": 1652 }, { - "epoch": 0.17, - "grad_norm": 2.2658417056945064, - "learning_rate": 9.46700114804362e-06, - "loss": 0.6715, + "epoch": 0.12, + "grad_norm": 1.6101887488009492, + "learning_rate": 9.801535750338395e-06, + "loss": 0.6017, "step": 1653 }, { - "epoch": 0.17, - "grad_norm": 3.2719118199932598, - "learning_rate": 9.46623523972268e-06, - "loss": 0.6762, + "epoch": 0.12, + "grad_norm": 0.8770295904184275, + "learning_rate": 9.801215069424097e-06, + "loss": 0.4734, "step": 1654 }, { - "epoch": 0.17, - "grad_norm": 3.0582788277872037, - "learning_rate": 9.465468812527443e-06, - "loss": 0.629, + "epoch": 0.12, + "grad_norm": 2.5538497684752905, + "learning_rate": 9.800894134893348e-06, + "loss": 0.5859, "step": 1655 }, { - "epoch": 0.17, - "grad_norm": 2.247235978920937, - "learning_rate": 9.464701866546945e-06, - "loss": 0.7408, + "epoch": 0.12, + "grad_norm": 1.773759860776207, + "learning_rate": 9.800572946763104e-06, + "loss": 0.5987, "step": 1656 }, { - "epoch": 0.17, - "grad_norm": 3.155468792797194, - "learning_rate": 9.463934401870292e-06, - "loss": 0.613, + "epoch": 0.12, + "grad_norm": 0.951090288170339, + "learning_rate": 9.800251505050331e-06, + "loss": 0.4638, "step": 1657 }, { - "epoch": 0.17, - "grad_norm": 2.544248700229081, - "learning_rate": 9.463166418586645e-06, - "loss": 0.7269, + "epoch": 0.12, + "grad_norm": 2.364074739333842, + "learning_rate": 9.799929809772007e-06, + "loss": 0.6612, "step": 1658 }, { - "epoch": 0.17, - "grad_norm": 2.3313482073212315, - "learning_rate": 9.462397916785222e-06, - "loss": 0.7243, + "epoch": 0.12, + "grad_norm": 2.4646801114604338, + "learning_rate": 9.799607860945126e-06, + "loss": 0.5699, "step": 1659 }, { - "epoch": 0.17, - "grad_norm": 2.82456167117737, - "learning_rate": 9.461628896555312e-06, - "loss": 0.7121, + "epoch": 0.12, + "grad_norm": 1.8673764833004431, + "learning_rate": 9.799285658586695e-06, + "loss": 0.6009, "step": 1660 }, { - "epoch": 0.17, - "grad_norm": 5.726685554503556, - "learning_rate": 9.460859357986251e-06, - "loss": 0.6316, + "epoch": 0.12, + "grad_norm": 1.6815522093024686, + "learning_rate": 9.798963202713734e-06, + "loss": 0.5708, "step": 1661 }, { - "epoch": 0.17, - "grad_norm": 3.542875855778497, - "learning_rate": 9.460089301167448e-06, - "loss": 0.6625, + "epoch": 0.12, + "grad_norm": 1.9845032090945882, + "learning_rate": 9.798640493343274e-06, + "loss": 0.5391, "step": 1662 }, { - "epoch": 0.17, - "grad_norm": 2.86229240642419, - "learning_rate": 9.45931872618836e-06, - "loss": 0.6818, + "epoch": 0.12, + "grad_norm": 1.6520694580596809, + "learning_rate": 9.798317530492365e-06, + "loss": 0.5726, "step": 1663 }, { - "epoch": 0.18, - "grad_norm": 2.656197150207348, - "learning_rate": 9.458547633138515e-06, - "loss": 0.6801, + "epoch": 0.12, + "grad_norm": 1.5480146687315741, + "learning_rate": 9.797994314178063e-06, + "loss": 0.5282, "step": 1664 }, { - "epoch": 0.18, - "grad_norm": 3.243631283233658, - "learning_rate": 9.457776022107494e-06, - "loss": 0.6679, + "epoch": 0.12, + "grad_norm": 1.8950087085896452, + "learning_rate": 9.797670844417444e-06, + "loss": 0.582, "step": 1665 }, { - "epoch": 0.18, - "grad_norm": 2.5293787058884636, - "learning_rate": 9.45700389318494e-06, - "loss": 0.7386, + "epoch": 0.12, + "grad_norm": 2.0085361032706666, + "learning_rate": 9.797347121227595e-06, + "loss": 0.596, "step": 1666 }, { - "epoch": 0.18, - "grad_norm": 2.310070540436612, - "learning_rate": 9.456231246460557e-06, - "loss": 0.774, + "epoch": 0.12, + "grad_norm": 2.031869936183376, + "learning_rate": 9.797023144625615e-06, + "loss": 0.6443, "step": 1667 }, { - "epoch": 0.18, - "grad_norm": 2.508606440342495, - "learning_rate": 9.455458082024112e-06, - "loss": 0.7392, + "epoch": 0.12, + "grad_norm": 2.0467287298403156, + "learning_rate": 9.796698914628618e-06, + "loss": 0.638, "step": 1668 }, { - "epoch": 0.18, - "grad_norm": 2.2920775313114405, - "learning_rate": 9.454684399965423e-06, - "loss": 0.758, + "epoch": 0.12, + "grad_norm": 4.6093371696203596, + "learning_rate": 9.79637443125373e-06, + "loss": 0.5975, "step": 1669 }, { - "epoch": 0.18, - "grad_norm": 9.041893234843545, - "learning_rate": 9.453910200374382e-06, - "loss": 0.6369, + "epoch": 0.12, + "grad_norm": 1.7774429536077863, + "learning_rate": 9.796049694518095e-06, + "loss": 0.6005, "step": 1670 }, { - "epoch": 0.18, - "grad_norm": 1.2931690055491833, - "learning_rate": 9.453135483340925e-06, - "loss": 0.6547, + "epoch": 0.12, + "grad_norm": 3.2054433318233624, + "learning_rate": 9.795724704438861e-06, + "loss": 0.5675, "step": 1671 }, { - "epoch": 0.18, - "grad_norm": 1.3115301263653227, - "learning_rate": 9.452360248955062e-06, - "loss": 0.6449, + "epoch": 0.12, + "grad_norm": 0.9681266789265309, + "learning_rate": 9.7953994610332e-06, + "loss": 0.4546, "step": 1672 }, { - "epoch": 0.18, - "grad_norm": 2.9637645722219323, - "learning_rate": 9.451584497306856e-06, - "loss": 0.6999, + "epoch": 0.12, + "grad_norm": 1.6134086807317816, + "learning_rate": 9.79507396431829e-06, + "loss": 0.535, "step": 1673 }, { - "epoch": 0.18, - "grad_norm": 2.3068845068680326, - "learning_rate": 9.45080822848643e-06, - "loss": 0.6948, + "epoch": 0.12, + "grad_norm": 2.9171573974938694, + "learning_rate": 9.794748214311324e-06, + "loss": 0.6382, "step": 1674 }, { - "epoch": 0.18, - "grad_norm": 2.2231756130525673, - "learning_rate": 9.45003144258397e-06, - "loss": 0.6791, + "epoch": 0.12, + "grad_norm": 1.6118727491446523, + "learning_rate": 9.794422211029511e-06, + "loss": 0.4913, "step": 1675 }, { - "epoch": 0.18, - "grad_norm": 3.320104844855623, - "learning_rate": 9.449254139689721e-06, - "loss": 0.6537, + "epoch": 0.12, + "grad_norm": 2.3586547827431494, + "learning_rate": 9.79409595449007e-06, + "loss": 0.5877, "step": 1676 }, { - "epoch": 0.18, - "grad_norm": 2.471206074285933, - "learning_rate": 9.448476319893989e-06, - "loss": 0.7204, + "epoch": 0.12, + "grad_norm": 1.717249667473596, + "learning_rate": 9.793769444710239e-06, + "loss": 0.5975, "step": 1677 }, { - "epoch": 0.18, - "grad_norm": 2.4573874653738934, - "learning_rate": 9.447697983287136e-06, - "loss": 0.6883, + "epoch": 0.12, + "grad_norm": 1.757111355760766, + "learning_rate": 9.793442681707261e-06, + "loss": 0.5307, "step": 1678 }, { - "epoch": 0.18, - "grad_norm": 2.2768018435285424, - "learning_rate": 9.446919129959589e-06, - "loss": 0.6646, + "epoch": 0.12, + "grad_norm": 1.6920483677746456, + "learning_rate": 9.793115665498398e-06, + "loss": 0.5652, "step": 1679 }, { - "epoch": 0.18, - "grad_norm": 2.5304009939179433, - "learning_rate": 9.44613976000183e-06, - "loss": 0.6923, + "epoch": 0.12, + "grad_norm": 1.7033990317954792, + "learning_rate": 9.792788396100923e-06, + "loss": 0.5937, "step": 1680 }, { - "epoch": 0.18, - "grad_norm": 2.3183304739700534, - "learning_rate": 9.44535987350441e-06, - "loss": 0.82, + "epoch": 0.12, + "grad_norm": 1.8810573999902531, + "learning_rate": 9.792460873532128e-06, + "loss": 0.5032, "step": 1681 }, { - "epoch": 0.18, - "grad_norm": 2.293573173274393, - "learning_rate": 9.44457947055793e-06, - "loss": 0.7597, + "epoch": 0.12, + "grad_norm": 1.9533958696397018, + "learning_rate": 9.792133097809308e-06, + "loss": 0.558, "step": 1682 }, { - "epoch": 0.18, - "grad_norm": 2.820715534237888, - "learning_rate": 9.443798551253052e-06, - "loss": 0.7216, + "epoch": 0.12, + "grad_norm": 1.8146314482778534, + "learning_rate": 9.79180506894978e-06, + "loss": 0.5705, "step": 1683 }, { - "epoch": 0.18, - "grad_norm": 2.122577759545682, - "learning_rate": 9.443017115680503e-06, - "loss": 0.7303, + "epoch": 0.12, + "grad_norm": 1.75601315195119, + "learning_rate": 9.791476786970873e-06, + "loss": 0.6213, "step": 1684 }, { - "epoch": 0.18, - "grad_norm": 2.653511448510485, - "learning_rate": 9.442235163931072e-06, - "loss": 0.6314, + "epoch": 0.12, + "grad_norm": 1.6628949380902125, + "learning_rate": 9.791148251889924e-06, + "loss": 0.6026, "step": 1685 }, { - "epoch": 0.18, - "grad_norm": 2.186415024401008, - "learning_rate": 9.441452696095601e-06, - "loss": 0.755, + "epoch": 0.12, + "grad_norm": 2.1787093040919747, + "learning_rate": 9.790819463724292e-06, + "loss": 0.5719, "step": 1686 }, { - "epoch": 0.18, - "grad_norm": 2.3274053893757563, - "learning_rate": 9.440669712264994e-06, - "loss": 0.7157, + "epoch": 0.12, + "grad_norm": 1.9656340111868644, + "learning_rate": 9.790490422491342e-06, + "loss": 0.5465, "step": 1687 }, { - "epoch": 0.18, - "grad_norm": 1.9206038169379611, - "learning_rate": 9.439886212530217e-06, - "loss": 0.7278, + "epoch": 0.12, + "grad_norm": 2.0233460014282256, + "learning_rate": 9.790161128208453e-06, + "loss": 0.6109, "step": 1688 }, { - "epoch": 0.18, - "grad_norm": 2.974321572145424, - "learning_rate": 9.439102196982292e-06, - "loss": 0.6442, + "epoch": 0.12, + "grad_norm": 2.219591199813304, + "learning_rate": 9.789831580893024e-06, + "loss": 0.5704, "step": 1689 }, { - "epoch": 0.18, - "grad_norm": 1.9034939575257075, - "learning_rate": 9.438317665712308e-06, - "loss": 0.7244, + "epoch": 0.12, + "grad_norm": 1.5528507550985806, + "learning_rate": 9.78950178056246e-06, + "loss": 0.539, "step": 1690 }, { - "epoch": 0.18, - "grad_norm": 2.5630336170505945, - "learning_rate": 9.437532618811407e-06, - "loss": 0.7238, + "epoch": 0.12, + "grad_norm": 1.793121006620478, + "learning_rate": 9.789171727234184e-06, + "loss": 0.6452, "step": 1691 }, { - "epoch": 0.18, - "grad_norm": 2.7600588498607768, - "learning_rate": 9.436747056370794e-06, - "loss": 0.652, + "epoch": 0.12, + "grad_norm": 1.812612359250488, + "learning_rate": 9.78884142092563e-06, + "loss": 0.5684, "step": 1692 }, { - "epoch": 0.18, - "grad_norm": 2.4504076582596195, - "learning_rate": 9.435960978481734e-06, - "loss": 0.7314, + "epoch": 0.12, + "grad_norm": 1.981753155531472, + "learning_rate": 9.788510861654244e-06, + "loss": 0.5224, "step": 1693 }, { - "epoch": 0.18, - "grad_norm": 2.51073812241736, - "learning_rate": 9.435174385235548e-06, - "loss": 0.7227, + "epoch": 0.12, + "grad_norm": 1.8328008107321687, + "learning_rate": 9.788180049437487e-06, + "loss": 0.6294, "step": 1694 }, { - "epoch": 0.18, - "grad_norm": 2.846356911208405, - "learning_rate": 9.434387276723624e-06, - "loss": 0.6857, + "epoch": 0.12, + "grad_norm": 1.016448791561159, + "learning_rate": 9.787848984292838e-06, + "loss": 0.4823, "step": 1695 }, { - "epoch": 0.18, - "grad_norm": 2.703615406650167, - "learning_rate": 9.433599653037406e-06, - "loss": 0.5352, + "epoch": 0.12, + "grad_norm": 1.606306438319079, + "learning_rate": 9.78751766623778e-06, + "loss": 0.5298, "step": 1696 }, { - "epoch": 0.18, - "grad_norm": 2.3457312870011955, - "learning_rate": 9.432811514268396e-06, - "loss": 0.7758, + "epoch": 0.12, + "grad_norm": 1.7539288028720381, + "learning_rate": 9.787186095289818e-06, + "loss": 0.5923, "step": 1697 }, { - "epoch": 0.18, - "grad_norm": 2.271626563468226, - "learning_rate": 9.432022860508158e-06, - "loss": 0.7422, + "epoch": 0.12, + "grad_norm": 1.7902307876041041, + "learning_rate": 9.786854271466465e-06, + "loss": 0.6406, "step": 1698 }, { - "epoch": 0.18, - "grad_norm": 2.3311293386557175, - "learning_rate": 9.431233691848316e-06, - "loss": 0.6922, + "epoch": 0.12, + "grad_norm": 8.767421809537648, + "learning_rate": 9.78652219478525e-06, + "loss": 0.6653, "step": 1699 }, { - "epoch": 0.18, - "grad_norm": 3.13275822233704, - "learning_rate": 9.430444008380553e-06, - "loss": 0.6746, + "epoch": 0.12, + "grad_norm": 1.6772022879558186, + "learning_rate": 9.786189865263713e-06, + "loss": 0.5475, "step": 1700 }, { - "epoch": 0.18, - "grad_norm": 2.3910164531032594, - "learning_rate": 9.429653810196611e-06, - "loss": 0.7156, + "epoch": 0.12, + "grad_norm": 1.609551122426434, + "learning_rate": 9.785857282919409e-06, + "loss": 0.5994, "step": 1701 }, { - "epoch": 0.18, - "grad_norm": 2.25390169947036, - "learning_rate": 9.428863097388295e-06, - "loss": 0.7435, + "epoch": 0.12, + "grad_norm": 2.1307515829363926, + "learning_rate": 9.785524447769907e-06, + "loss": 0.5248, "step": 1702 }, { - "epoch": 0.18, - "grad_norm": 1.9560405228811595, - "learning_rate": 9.428071870047469e-06, - "loss": 0.7, + "epoch": 0.12, + "grad_norm": 2.447965777138737, + "learning_rate": 9.785191359832789e-06, + "loss": 0.5236, "step": 1703 }, { - "epoch": 0.18, - "grad_norm": 1.8824075331234948, - "learning_rate": 9.427280128266049e-06, - "loss": 0.7596, + "epoch": 0.12, + "grad_norm": 1.6619264478435938, + "learning_rate": 9.784858019125647e-06, + "loss": 0.533, "step": 1704 }, { - "epoch": 0.18, - "grad_norm": 2.286053461352808, - "learning_rate": 9.426487872136025e-06, - "loss": 0.7239, + "epoch": 0.12, + "grad_norm": 1.5915751748013487, + "learning_rate": 9.784524425666092e-06, + "loss": 0.5632, "step": 1705 }, { - "epoch": 0.18, - "grad_norm": 2.008603418544589, - "learning_rate": 9.425695101749435e-06, - "loss": 0.7413, + "epoch": 0.12, + "grad_norm": 1.6637265226514524, + "learning_rate": 9.784190579471746e-06, + "loss": 0.5771, "step": 1706 }, { - "epoch": 0.18, - "grad_norm": 2.10028582452023, - "learning_rate": 9.424901817198381e-06, - "loss": 0.6829, + "epoch": 0.12, + "grad_norm": 2.1892585503153517, + "learning_rate": 9.78385648056024e-06, + "loss": 0.6139, "step": 1707 }, { - "epoch": 0.18, - "grad_norm": 2.1535649860037007, - "learning_rate": 9.424108018575026e-06, - "loss": 0.7089, + "epoch": 0.12, + "grad_norm": 2.2017523093281706, + "learning_rate": 9.783522128949227e-06, + "loss": 0.5735, "step": 1708 }, { - "epoch": 0.18, - "grad_norm": 2.8660284485190903, - "learning_rate": 9.42331370597159e-06, - "loss": 0.7544, + "epoch": 0.12, + "grad_norm": 1.8500997631793252, + "learning_rate": 9.783187524656366e-06, + "loss": 0.6108, "step": 1709 }, { - "epoch": 0.18, - "grad_norm": 2.5095078891354508, - "learning_rate": 9.422518879480353e-06, - "loss": 0.6405, + "epoch": 0.12, + "grad_norm": 1.6286744618554194, + "learning_rate": 9.782852667699331e-06, + "loss": 0.6234, "step": 1710 }, { - "epoch": 0.18, - "grad_norm": 2.1188014421663874, - "learning_rate": 9.421723539193657e-06, - "loss": 0.6818, + "epoch": 0.12, + "grad_norm": 1.6612408748054623, + "learning_rate": 9.782517558095814e-06, + "loss": 0.5912, "step": 1711 }, { - "epoch": 0.18, - "grad_norm": 3.163280526338815, - "learning_rate": 9.420927685203901e-06, - "loss": 0.6727, + "epoch": 0.12, + "grad_norm": 0.980804823788116, + "learning_rate": 9.782182195863513e-06, + "loss": 0.4386, "step": 1712 }, { - "epoch": 0.18, - "grad_norm": 3.7994284590304366, - "learning_rate": 9.42013131760355e-06, - "loss": 0.6459, + "epoch": 0.12, + "grad_norm": 2.6432678732711907, + "learning_rate": 9.781846581020142e-06, + "loss": 0.6171, "step": 1713 }, { - "epoch": 0.18, - "grad_norm": 2.098234987273422, - "learning_rate": 9.419334436485117e-06, - "loss": 0.7406, + "epoch": 0.12, + "grad_norm": 1.6488865030594788, + "learning_rate": 9.781510713583433e-06, + "loss": 0.5513, "step": 1714 }, { - "epoch": 0.18, - "grad_norm": 2.204421750962229, - "learning_rate": 9.418537041941185e-06, - "loss": 0.75, + "epoch": 0.12, + "grad_norm": 1.9297626843937195, + "learning_rate": 9.781174593571127e-06, + "loss": 0.626, "step": 1715 }, { - "epoch": 0.18, - "grad_norm": 2.551866812050452, - "learning_rate": 9.417739134064392e-06, - "loss": 0.7352, + "epoch": 0.12, + "grad_norm": 2.4946679741702518, + "learning_rate": 9.78083822100098e-06, + "loss": 0.5752, "step": 1716 }, { - "epoch": 0.18, - "grad_norm": 2.1052536649230147, - "learning_rate": 9.416940712947436e-06, - "loss": 0.7457, + "epoch": 0.12, + "grad_norm": 1.622967771019889, + "learning_rate": 9.780501595890755e-06, + "loss": 0.5999, "step": 1717 }, { - "epoch": 0.18, - "grad_norm": 2.552123073690243, - "learning_rate": 9.416141778683077e-06, - "loss": 0.7008, + "epoch": 0.12, + "grad_norm": 1.5030803376976007, + "learning_rate": 9.780164718258238e-06, + "loss": 0.5073, "step": 1718 }, { - "epoch": 0.18, - "grad_norm": 2.386726881220311, - "learning_rate": 9.415342331364132e-06, - "loss": 0.6845, + "epoch": 0.12, + "grad_norm": 1.9132037930952, + "learning_rate": 9.779827588121223e-06, + "loss": 0.5213, "step": 1719 }, { - "epoch": 0.18, - "grad_norm": 2.0231356594407064, - "learning_rate": 9.414542371083477e-06, - "loss": 0.6269, + "epoch": 0.12, + "grad_norm": 1.5061665763363306, + "learning_rate": 9.77949020549752e-06, + "loss": 0.5955, "step": 1720 }, { - "epoch": 0.18, - "grad_norm": 3.3944077369294376, - "learning_rate": 9.413741897934052e-06, - "loss": 0.7178, + "epoch": 0.12, + "grad_norm": 2.751075858961627, + "learning_rate": 9.779152570404948e-06, + "loss": 0.5999, "step": 1721 }, { - "epoch": 0.18, - "grad_norm": 2.4163679275832997, - "learning_rate": 9.412940912008852e-06, - "loss": 0.6554, + "epoch": 0.12, + "grad_norm": 1.4720862941923512, + "learning_rate": 9.778814682861344e-06, + "loss": 0.6689, "step": 1722 }, { - "epoch": 0.18, - "grad_norm": 2.1677504987549554, - "learning_rate": 9.412139413400933e-06, - "loss": 0.7565, + "epoch": 0.12, + "grad_norm": 2.030438264972945, + "learning_rate": 9.778476542884553e-06, + "loss": 0.6117, "step": 1723 }, { - "epoch": 0.18, - "grad_norm": 8.628059217990637, - "learning_rate": 9.41133740220341e-06, - "loss": 0.6513, + "epoch": 0.12, + "grad_norm": 2.1218447446264057, + "learning_rate": 9.778138150492441e-06, + "loss": 0.5829, "step": 1724 }, { - "epoch": 0.18, - "grad_norm": 2.2085858844991972, - "learning_rate": 9.410534878509461e-06, - "loss": 0.6636, + "epoch": 0.12, + "grad_norm": 1.601387980763318, + "learning_rate": 9.777799505702883e-06, + "loss": 0.5979, "step": 1725 }, { - "epoch": 0.18, - "grad_norm": 1.9079694945157286, - "learning_rate": 9.40973184241232e-06, - "loss": 0.6428, + "epoch": 0.12, + "grad_norm": 1.9026227421275081, + "learning_rate": 9.777460608533765e-06, + "loss": 0.5445, "step": 1726 }, { - "epoch": 0.18, - "grad_norm": 2.2280940372738156, - "learning_rate": 9.408928294005279e-06, - "loss": 0.6991, + "epoch": 0.12, + "grad_norm": 1.7601491290801956, + "learning_rate": 9.777121459002987e-06, + "loss": 0.4724, "step": 1727 }, { - "epoch": 0.18, - "grad_norm": 2.2464560200093553, - "learning_rate": 9.408124233381695e-06, - "loss": 0.7057, + "epoch": 0.12, + "grad_norm": 1.5833284767521465, + "learning_rate": 9.77678205712847e-06, + "loss": 0.5658, "step": 1728 }, { - "epoch": 0.18, - "grad_norm": 2.41547778019205, - "learning_rate": 9.40731966063498e-06, - "loss": 0.7175, + "epoch": 0.12, + "grad_norm": 1.7589918083326543, + "learning_rate": 9.776442402928137e-06, + "loss": 0.6454, "step": 1729 }, { - "epoch": 0.18, - "grad_norm": 1.9493683126801489, - "learning_rate": 9.406514575858606e-06, - "loss": 0.6848, + "epoch": 0.12, + "grad_norm": 1.9417152826057664, + "learning_rate": 9.77610249641993e-06, + "loss": 0.5311, "step": 1730 }, { - "epoch": 0.18, - "grad_norm": 2.653450440842846, - "learning_rate": 9.405708979146106e-06, - "loss": 0.7155, + "epoch": 0.12, + "grad_norm": 1.62205089531339, + "learning_rate": 9.775762337621808e-06, + "loss": 0.6092, "step": 1731 }, { - "epoch": 0.18, - "grad_norm": 2.2614446312235077, - "learning_rate": 9.404902870591076e-06, - "loss": 0.7237, + "epoch": 0.12, + "grad_norm": 1.5843787918497383, + "learning_rate": 9.775421926551737e-06, + "loss": 0.538, "step": 1732 }, { - "epoch": 0.18, - "grad_norm": 2.236025418767225, - "learning_rate": 9.40409625028716e-06, - "loss": 0.7383, + "epoch": 0.12, + "grad_norm": 1.4691147932554922, + "learning_rate": 9.775081263227696e-06, + "loss": 0.562, "step": 1733 }, { - "epoch": 0.18, - "grad_norm": 2.0068155617046903, - "learning_rate": 9.403289118328074e-06, - "loss": 0.624, + "epoch": 0.12, + "grad_norm": 1.712318444821895, + "learning_rate": 9.774740347667684e-06, + "loss": 0.6503, "step": 1734 }, { - "epoch": 0.18, - "grad_norm": 2.586872590910335, - "learning_rate": 9.402481474807588e-06, - "loss": 0.6978, + "epoch": 0.12, + "grad_norm": 1.76385220329107, + "learning_rate": 9.774399179889708e-06, + "loss": 0.5756, "step": 1735 }, { - "epoch": 0.18, - "grad_norm": 2.05535830543558, - "learning_rate": 9.401673319819529e-06, - "loss": 0.6669, + "epoch": 0.12, + "grad_norm": 1.8109453036453846, + "learning_rate": 9.774057759911788e-06, + "loss": 0.5844, "step": 1736 }, { - "epoch": 0.18, - "grad_norm": 1.8884455652664238, - "learning_rate": 9.400864653457789e-06, - "loss": 0.697, + "epoch": 0.12, + "grad_norm": 0.9796204919772943, + "learning_rate": 9.773716087751963e-06, + "loss": 0.4684, "step": 1737 }, { - "epoch": 0.18, - "grad_norm": 2.1851392337369266, - "learning_rate": 9.400055475816313e-06, - "loss": 0.6524, + "epoch": 0.12, + "grad_norm": 1.7391490141610613, + "learning_rate": 9.773374163428276e-06, + "loss": 0.5618, "step": 1738 }, { - "epoch": 0.18, - "grad_norm": 2.576173165092392, - "learning_rate": 9.399245786989112e-06, - "loss": 0.7119, + "epoch": 0.12, + "grad_norm": 1.6642568469827328, + "learning_rate": 9.773031986958791e-06, + "loss": 0.5926, "step": 1739 }, { - "epoch": 0.18, - "grad_norm": 2.620809902632731, - "learning_rate": 9.398435587070254e-06, - "loss": 0.7611, + "epoch": 0.12, + "grad_norm": 1.907366466457657, + "learning_rate": 9.772689558361585e-06, + "loss": 0.5525, "step": 1740 }, { - "epoch": 0.18, - "grad_norm": 2.8033777586962114, - "learning_rate": 9.397624876153862e-06, - "loss": 0.6888, + "epoch": 0.12, + "grad_norm": 1.9133541465407675, + "learning_rate": 9.772346877654746e-06, + "loss": 0.5691, "step": 1741 }, { - "epoch": 0.18, - "grad_norm": 2.1952968707428666, - "learning_rate": 9.396813654334124e-06, - "loss": 0.729, + "epoch": 0.12, + "grad_norm": 1.7544241640946543, + "learning_rate": 9.772003944856371e-06, + "loss": 0.6077, "step": 1742 }, { - "epoch": 0.18, - "grad_norm": 2.184788154360516, - "learning_rate": 9.396001921705287e-06, - "loss": 0.7522, + "epoch": 0.12, + "grad_norm": 1.7163307483405952, + "learning_rate": 9.77166075998458e-06, + "loss": 0.5678, "step": 1743 }, { - "epoch": 0.18, - "grad_norm": 2.5561173456941186, - "learning_rate": 9.395189678361655e-06, - "loss": 0.6819, + "epoch": 0.12, + "grad_norm": 1.5364558271019502, + "learning_rate": 9.771317323057498e-06, + "loss": 0.57, "step": 1744 }, { - "epoch": 0.18, - "grad_norm": 2.9560468612635016, - "learning_rate": 9.39437692439759e-06, - "loss": 0.7499, + "epoch": 0.12, + "grad_norm": 0.8491838709009322, + "learning_rate": 9.770973634093267e-06, + "loss": 0.4801, "step": 1745 }, { - "epoch": 0.18, - "grad_norm": 3.170377346969926, - "learning_rate": 9.393563659907516e-06, - "loss": 0.7307, + "epoch": 0.12, + "grad_norm": 0.9488322829777677, + "learning_rate": 9.770629693110042e-06, + "loss": 0.4862, "step": 1746 }, { - "epoch": 0.18, - "grad_norm": 2.0529367396092386, - "learning_rate": 9.392749884985918e-06, - "loss": 0.6618, + "epoch": 0.12, + "grad_norm": 1.5608502494769665, + "learning_rate": 9.770285500125993e-06, + "loss": 0.6013, "step": 1747 }, { - "epoch": 0.18, - "grad_norm": 2.5770411994159867, - "learning_rate": 9.391935599727336e-06, - "loss": 0.702, + "epoch": 0.12, + "grad_norm": 1.9447417433967484, + "learning_rate": 9.7699410551593e-06, + "loss": 0.6285, "step": 1748 }, { - "epoch": 0.18, - "grad_norm": 2.1159695596217305, - "learning_rate": 9.391120804226372e-06, - "loss": 0.7339, + "epoch": 0.12, + "grad_norm": 1.6900212055833288, + "learning_rate": 9.769596358228158e-06, + "loss": 0.5384, "step": 1749 }, { - "epoch": 0.18, - "grad_norm": 2.9557470831906363, - "learning_rate": 9.390305498577685e-06, - "loss": 0.829, + "epoch": 0.12, + "grad_norm": 2.3974428956127847, + "learning_rate": 9.769251409350773e-06, + "loss": 0.6307, "step": 1750 }, { - "epoch": 0.18, - "grad_norm": 2.066429118959667, - "learning_rate": 9.389489682875999e-06, - "loss": 0.7344, + "epoch": 0.12, + "grad_norm": 1.6235126037514267, + "learning_rate": 9.76890620854537e-06, + "loss": 0.5694, "step": 1751 }, { - "epoch": 0.18, - "grad_norm": 2.3728866613105315, - "learning_rate": 9.388673357216088e-06, - "loss": 0.6821, + "epoch": 0.12, + "grad_norm": 1.8290281895591345, + "learning_rate": 9.768560755830181e-06, + "loss": 0.5884, "step": 1752 }, { - "epoch": 0.18, - "grad_norm": 2.5738421703420835, - "learning_rate": 9.387856521692795e-06, - "loss": 0.6881, + "epoch": 0.12, + "grad_norm": 1.7045940824613837, + "learning_rate": 9.768215051223454e-06, + "loss": 0.6855, "step": 1753 }, { - "epoch": 0.18, - "grad_norm": 2.2173777484723933, - "learning_rate": 9.387039176401013e-06, - "loss": 0.6497, + "epoch": 0.12, + "grad_norm": 1.6657670482430476, + "learning_rate": 9.767869094743451e-06, + "loss": 0.546, "step": 1754 }, { - "epoch": 0.18, - "grad_norm": 2.8326929223772916, - "learning_rate": 9.386221321435702e-06, - "loss": 0.7752, + "epoch": 0.12, + "grad_norm": 1.650841482514477, + "learning_rate": 9.767522886408449e-06, + "loss": 0.5633, "step": 1755 }, { - "epoch": 0.18, - "grad_norm": 2.342472475816738, - "learning_rate": 9.385402956891878e-06, - "loss": 0.6642, + "epoch": 0.12, + "grad_norm": 2.460697297290688, + "learning_rate": 9.767176426236731e-06, + "loss": 0.5462, "step": 1756 }, { - "epoch": 0.18, - "grad_norm": 1.8421908380492378, - "learning_rate": 9.384584082864614e-06, - "loss": 0.628, + "epoch": 0.12, + "grad_norm": 1.8564609757309207, + "learning_rate": 9.766829714246601e-06, + "loss": 0.5147, "step": 1757 }, { - "epoch": 0.18, - "grad_norm": 2.293083670289089, - "learning_rate": 9.383764699449047e-06, - "loss": 0.7642, + "epoch": 0.12, + "grad_norm": 2.0074975508382584, + "learning_rate": 9.766482750456374e-06, + "loss": 0.5542, "step": 1758 }, { - "epoch": 0.19, - "grad_norm": 3.1881257410006234, - "learning_rate": 9.382944806740369e-06, - "loss": 0.697, + "epoch": 0.12, + "grad_norm": 3.2097870773622716, + "learning_rate": 9.766135534884378e-06, + "loss": 0.6294, "step": 1759 }, { - "epoch": 0.19, - "grad_norm": 2.0454383098040125, - "learning_rate": 9.382124404833832e-06, - "loss": 0.6683, + "epoch": 0.12, + "grad_norm": 1.969401746085967, + "learning_rate": 9.765788067548952e-06, + "loss": 0.655, "step": 1760 }, { - "epoch": 0.19, - "grad_norm": 2.39746694551876, - "learning_rate": 9.38130349382475e-06, - "loss": 0.7014, + "epoch": 0.12, + "grad_norm": 0.9967869336526871, + "learning_rate": 9.765440348468452e-06, + "loss": 0.4917, "step": 1761 }, { - "epoch": 0.19, - "grad_norm": 2.646324899559431, - "learning_rate": 9.380482073808493e-06, - "loss": 0.6275, + "epoch": 0.13, + "grad_norm": 1.8080405259289378, + "learning_rate": 9.765092377661244e-06, + "loss": 0.6236, "step": 1762 }, { - "epoch": 0.19, - "grad_norm": 2.31148966058067, - "learning_rate": 9.379660144880491e-06, - "loss": 0.6855, + "epoch": 0.13, + "grad_norm": 2.001248753149921, + "learning_rate": 9.764744155145713e-06, + "loss": 0.6595, "step": 1763 }, { - "epoch": 0.19, - "grad_norm": 2.6610941993286072, - "learning_rate": 9.378837707136235e-06, - "loss": 0.721, + "epoch": 0.13, + "grad_norm": 1.8313497394619975, + "learning_rate": 9.764395680940248e-06, + "loss": 0.48, "step": 1764 }, { - "epoch": 0.19, - "grad_norm": 2.181626035362435, - "learning_rate": 9.37801476067127e-06, - "loss": 0.7382, + "epoch": 0.13, + "grad_norm": 0.903556680776509, + "learning_rate": 9.764046955063261e-06, + "loss": 0.4482, "step": 1765 }, { - "epoch": 0.19, - "grad_norm": 2.5121904009240774, - "learning_rate": 9.377191305581208e-06, - "loss": 0.6953, + "epoch": 0.13, + "grad_norm": 2.186807309459345, + "learning_rate": 9.763697977533172e-06, + "loss": 0.6006, "step": 1766 }, { - "epoch": 0.19, - "grad_norm": 2.324736532829736, - "learning_rate": 9.376367341961712e-06, - "loss": 0.6536, + "epoch": 0.13, + "grad_norm": 1.737625322769082, + "learning_rate": 9.763348748368411e-06, + "loss": 0.6262, "step": 1767 }, { - "epoch": 0.19, - "grad_norm": 2.1731590595672206, - "learning_rate": 9.375542869908509e-06, - "loss": 0.6795, + "epoch": 0.13, + "grad_norm": 0.9047360426339274, + "learning_rate": 9.76299926758743e-06, + "loss": 0.4639, "step": 1768 }, { - "epoch": 0.19, - "grad_norm": 2.0441280442692396, - "learning_rate": 9.374717889517384e-06, - "loss": 0.6707, + "epoch": 0.13, + "grad_norm": 1.649864737356665, + "learning_rate": 9.762649535208689e-06, + "loss": 0.5644, "step": 1769 }, { - "epoch": 0.19, - "grad_norm": 2.188945519966826, - "learning_rate": 9.373892400884182e-06, - "loss": 0.6427, + "epoch": 0.13, + "grad_norm": 1.8913121564534474, + "learning_rate": 9.76229955125066e-06, + "loss": 0.5757, "step": 1770 }, { - "epoch": 0.19, - "grad_norm": 1.8292074188975915, - "learning_rate": 9.373066404104803e-06, - "loss": 0.6416, + "epoch": 0.13, + "grad_norm": 2.7826359725913457, + "learning_rate": 9.761949315731834e-06, + "loss": 0.5445, "step": 1771 }, { - "epoch": 0.19, - "grad_norm": 2.2353048574201884, - "learning_rate": 9.37223989927521e-06, - "loss": 0.7069, + "epoch": 0.13, + "grad_norm": 1.5926916333176009, + "learning_rate": 9.761598828670707e-06, + "loss": 0.5988, "step": 1772 }, { - "epoch": 0.19, - "grad_norm": 3.313904910526145, - "learning_rate": 9.371412886491424e-06, - "loss": 0.7505, + "epoch": 0.13, + "grad_norm": 2.150778943168815, + "learning_rate": 9.761248090085798e-06, + "loss": 0.5639, "step": 1773 }, { - "epoch": 0.19, - "grad_norm": 4.177329261346893, - "learning_rate": 9.370585365849527e-06, - "loss": 0.6287, + "epoch": 0.13, + "grad_norm": 1.5887044410235698, + "learning_rate": 9.76089709999563e-06, + "loss": 0.5251, "step": 1774 }, { - "epoch": 0.19, - "grad_norm": 2.509687395524509, - "learning_rate": 9.369757337445655e-06, - "loss": 0.6938, + "epoch": 0.13, + "grad_norm": 1.7625364747691568, + "learning_rate": 9.760545858418744e-06, + "loss": 0.6303, "step": 1775 }, { - "epoch": 0.19, - "grad_norm": 2.50959271620824, - "learning_rate": 9.368928801376009e-06, - "loss": 0.7594, + "epoch": 0.13, + "grad_norm": 1.606159875096524, + "learning_rate": 9.760194365373696e-06, + "loss": 0.5841, "step": 1776 }, { - "epoch": 0.19, - "grad_norm": 2.3425929238257943, - "learning_rate": 9.368099757736843e-06, - "loss": 0.6348, + "epoch": 0.13, + "grad_norm": 1.884117181246165, + "learning_rate": 9.759842620879053e-06, + "loss": 0.619, "step": 1777 }, { - "epoch": 0.19, - "grad_norm": 2.571462951149562, - "learning_rate": 9.367270206624474e-06, - "loss": 0.7839, + "epoch": 0.13, + "grad_norm": 2.049831972534073, + "learning_rate": 9.759490624953393e-06, + "loss": 0.5811, "step": 1778 }, { - "epoch": 0.19, - "grad_norm": 2.288592181156132, - "learning_rate": 9.366440148135276e-06, - "loss": 0.7086, + "epoch": 0.13, + "grad_norm": 1.5928390716809229, + "learning_rate": 9.759138377615311e-06, + "loss": 0.5798, "step": 1779 }, { - "epoch": 0.19, - "grad_norm": 3.7452409081529328, - "learning_rate": 9.365609582365685e-06, - "loss": 0.7787, + "epoch": 0.13, + "grad_norm": 1.8439494015108664, + "learning_rate": 9.758785878883413e-06, + "loss": 0.5735, "step": 1780 }, { - "epoch": 0.19, - "grad_norm": 2.188224299368682, - "learning_rate": 9.364778509412191e-06, - "loss": 0.7622, + "epoch": 0.13, + "grad_norm": 1.6746342225967408, + "learning_rate": 9.758433128776321e-06, + "loss": 0.536, "step": 1781 }, { - "epoch": 0.19, - "grad_norm": 2.7450685765521223, - "learning_rate": 9.363946929371349e-06, - "loss": 0.6863, + "epoch": 0.13, + "grad_norm": 1.705669000634515, + "learning_rate": 9.758080127312666e-06, + "loss": 0.5219, "step": 1782 }, { - "epoch": 0.19, - "grad_norm": 2.5045275357571284, - "learning_rate": 9.363114842339767e-06, - "loss": 0.7075, + "epoch": 0.13, + "grad_norm": 2.2142027678037226, + "learning_rate": 9.757726874511097e-06, + "loss": 0.6629, "step": 1783 }, { - "epoch": 0.19, - "grad_norm": 3.0164414465737837, - "learning_rate": 9.362282248414114e-06, - "loss": 0.6598, + "epoch": 0.13, + "grad_norm": 1.7625890345692965, + "learning_rate": 9.757373370390274e-06, + "loss": 0.6193, "step": 1784 }, { - "epoch": 0.19, - "grad_norm": 2.35492294691701, - "learning_rate": 9.361449147691122e-06, - "loss": 0.7235, + "epoch": 0.13, + "grad_norm": 2.0942723124065004, + "learning_rate": 9.75701961496887e-06, + "loss": 0.5819, "step": 1785 }, { - "epoch": 0.19, - "grad_norm": 2.2619503151970335, - "learning_rate": 9.360615540267572e-06, - "loss": 0.6997, + "epoch": 0.13, + "grad_norm": 1.7134856774426843, + "learning_rate": 9.75666560826557e-06, + "loss": 0.6335, "step": 1786 }, { - "epoch": 0.19, - "grad_norm": 2.5899599164267766, - "learning_rate": 9.359781426240316e-06, - "loss": 0.7257, + "epoch": 0.13, + "grad_norm": 1.8311358067731494, + "learning_rate": 9.756311350299073e-06, + "loss": 0.565, "step": 1787 }, { - "epoch": 0.19, - "grad_norm": 2.5634208857345366, - "learning_rate": 9.358946805706257e-06, - "loss": 0.664, + "epoch": 0.13, + "grad_norm": 1.6952227730767107, + "learning_rate": 9.755956841088096e-06, + "loss": 0.6534, "step": 1788 }, { - "epoch": 0.19, - "grad_norm": 4.51798972826991, - "learning_rate": 9.358111678762359e-06, - "loss": 0.6728, + "epoch": 0.13, + "grad_norm": 2.511588474741989, + "learning_rate": 9.755602080651363e-06, + "loss": 0.6159, "step": 1789 }, { - "epoch": 0.19, - "grad_norm": 2.3326773791425466, - "learning_rate": 9.357276045505643e-06, - "loss": 0.7358, + "epoch": 0.13, + "grad_norm": 1.6418722437803235, + "learning_rate": 9.755247069007611e-06, + "loss": 0.6116, "step": 1790 }, { - "epoch": 0.19, - "grad_norm": 2.2350420427602833, - "learning_rate": 9.35643990603319e-06, - "loss": 0.6856, + "epoch": 0.13, + "grad_norm": 1.8406873494696212, + "learning_rate": 9.754891806175599e-06, + "loss": 0.568, "step": 1791 }, { - "epoch": 0.19, - "grad_norm": 2.608868685684469, - "learning_rate": 9.355603260442145e-06, - "loss": 0.6177, + "epoch": 0.13, + "grad_norm": 1.513150197524101, + "learning_rate": 9.754536292174091e-06, + "loss": 0.5775, "step": 1792 }, { - "epoch": 0.19, - "grad_norm": 1.9713835504144617, - "learning_rate": 9.354766108829703e-06, - "loss": 0.7943, + "epoch": 0.13, + "grad_norm": 1.8038826352427186, + "learning_rate": 9.754180527021863e-06, + "loss": 0.6082, "step": 1793 }, { - "epoch": 0.19, - "grad_norm": 2.8062689555008857, - "learning_rate": 9.353928451293122e-06, - "loss": 0.6725, + "epoch": 0.13, + "grad_norm": 1.611546032342575, + "learning_rate": 9.753824510737711e-06, + "loss": 0.5236, "step": 1794 }, { - "epoch": 0.19, - "grad_norm": 2.671272851573433, - "learning_rate": 9.35309028792972e-06, - "loss": 0.6848, + "epoch": 0.13, + "grad_norm": 1.5351709866888281, + "learning_rate": 9.753468243340441e-06, + "loss": 0.6076, "step": 1795 }, { - "epoch": 0.19, - "grad_norm": 2.066156049708186, - "learning_rate": 9.352251618836872e-06, - "loss": 0.7521, + "epoch": 0.13, + "grad_norm": 1.6747702563689981, + "learning_rate": 9.75311172484887e-06, + "loss": 0.5674, "step": 1796 }, { - "epoch": 0.19, - "grad_norm": 3.0657191943549273, - "learning_rate": 9.351412444112013e-06, - "loss": 0.6063, + "epoch": 0.13, + "grad_norm": 1.648031312017971, + "learning_rate": 9.752754955281832e-06, + "loss": 0.6199, "step": 1797 }, { - "epoch": 0.19, - "grad_norm": 2.647445442676486, - "learning_rate": 9.350572763852633e-06, - "loss": 0.6984, + "epoch": 0.13, + "grad_norm": 1.5706558811508375, + "learning_rate": 9.752397934658174e-06, + "loss": 0.6213, "step": 1798 }, { - "epoch": 0.19, - "grad_norm": 2.2824264713651994, - "learning_rate": 9.349732578156286e-06, - "loss": 0.6747, + "epoch": 0.13, + "grad_norm": 2.35205738774739, + "learning_rate": 9.752040662996754e-06, + "loss": 0.5428, "step": 1799 }, { - "epoch": 0.19, - "grad_norm": 2.232278265988478, - "learning_rate": 9.348891887120582e-06, - "loss": 0.7481, + "epoch": 0.13, + "grad_norm": 1.7018239185476982, + "learning_rate": 9.751683140316441e-06, + "loss": 0.5652, "step": 1800 }, { - "epoch": 0.19, - "grad_norm": 2.2260356050576937, - "learning_rate": 9.348050690843192e-06, - "loss": 0.7101, + "epoch": 0.13, + "grad_norm": 1.853198227662673, + "learning_rate": 9.751325366636127e-06, + "loss": 0.5787, "step": 1801 }, { - "epoch": 0.19, - "grad_norm": 2.6245692650813734, - "learning_rate": 9.347208989421838e-06, - "loss": 0.6776, + "epoch": 0.13, + "grad_norm": 1.4283102049329293, + "learning_rate": 9.750967341974706e-06, + "loss": 0.5545, "step": 1802 }, { - "epoch": 0.19, - "grad_norm": 2.8932084963461766, - "learning_rate": 9.346366782954313e-06, - "loss": 0.7145, + "epoch": 0.13, + "grad_norm": 1.7892101508664149, + "learning_rate": 9.750609066351092e-06, + "loss": 0.5921, "step": 1803 }, { - "epoch": 0.19, - "grad_norm": 2.6293013765675806, - "learning_rate": 9.345524071538457e-06, - "loss": 0.762, + "epoch": 0.13, + "grad_norm": 1.9886044622953356, + "learning_rate": 9.75025053978421e-06, + "loss": 0.5352, "step": 1804 }, { - "epoch": 0.19, - "grad_norm": 2.4909478746794926, - "learning_rate": 9.344680855272178e-06, - "loss": 0.7797, + "epoch": 0.13, + "grad_norm": 1.790273329919268, + "learning_rate": 9.749891762292998e-06, + "loss": 0.5972, "step": 1805 }, { - "epoch": 0.19, - "grad_norm": 2.678911462141494, - "learning_rate": 9.343837134253434e-06, - "loss": 0.7017, + "epoch": 0.13, + "grad_norm": 1.483443222067405, + "learning_rate": 9.749532733896408e-06, + "loss": 0.5451, "step": 1806 }, { - "epoch": 0.19, - "grad_norm": 3.236697329924541, - "learning_rate": 9.342992908580252e-06, - "loss": 0.6629, + "epoch": 0.13, + "grad_norm": 1.5320377845397621, + "learning_rate": 9.749173454613405e-06, + "loss": 0.6303, "step": 1807 }, { - "epoch": 0.19, - "grad_norm": 2.6302188588519484, - "learning_rate": 9.342148178350705e-06, - "loss": 0.7272, + "epoch": 0.13, + "grad_norm": 1.5696239111847572, + "learning_rate": 9.74881392446297e-06, + "loss": 0.5424, "step": 1808 }, { - "epoch": 0.19, - "grad_norm": 3.3111476074297292, - "learning_rate": 9.341302943662937e-06, - "loss": 0.6723, + "epoch": 0.13, + "grad_norm": 1.9336253127248402, + "learning_rate": 9.74845414346409e-06, + "loss": 0.6305, "step": 1809 }, { - "epoch": 0.19, - "grad_norm": 2.9282424193071446, - "learning_rate": 9.34045720461514e-06, - "loss": 0.8171, + "epoch": 0.13, + "grad_norm": 1.1120712276236346, + "learning_rate": 9.748094111635774e-06, + "loss": 0.4822, "step": 1810 }, { - "epoch": 0.19, - "grad_norm": 2.9521986711914656, - "learning_rate": 9.339610961305575e-06, - "loss": 0.7266, + "epoch": 0.13, + "grad_norm": 0.9854686968662069, + "learning_rate": 9.747733828997036e-06, + "loss": 0.4793, "step": 1811 }, { - "epoch": 0.19, - "grad_norm": 2.6008867763760595, - "learning_rate": 9.33876421383255e-06, - "loss": 0.6991, + "epoch": 0.13, + "grad_norm": 1.6916323840798329, + "learning_rate": 9.747373295566911e-06, + "loss": 0.5994, "step": 1812 }, { - "epoch": 0.19, - "grad_norm": 2.4219540920578213, - "learning_rate": 9.337916962294443e-06, - "loss": 0.7751, + "epoch": 0.13, + "grad_norm": 1.7034161389557745, + "learning_rate": 9.747012511364442e-06, + "loss": 0.5634, "step": 1813 }, { - "epoch": 0.19, - "grad_norm": 2.3994526596884787, - "learning_rate": 9.337069206789681e-06, - "loss": 0.7132, + "epoch": 0.13, + "grad_norm": 2.471965522940496, + "learning_rate": 9.746651476408687e-06, + "loss": 0.4955, "step": 1814 }, { - "epoch": 0.19, - "grad_norm": 2.141697670450548, - "learning_rate": 9.336220947416757e-06, - "loss": 0.6879, + "epoch": 0.13, + "grad_norm": 2.319652145767398, + "learning_rate": 9.746290190718717e-06, + "loss": 0.5425, "step": 1815 }, { - "epoch": 0.19, - "grad_norm": 2.442700420921565, - "learning_rate": 9.335372184274219e-06, - "loss": 0.725, + "epoch": 0.13, + "grad_norm": 1.59692612732707, + "learning_rate": 9.745928654313617e-06, + "loss": 0.5992, "step": 1816 }, { - "epoch": 0.19, - "grad_norm": 2.9922843786433275, - "learning_rate": 9.334522917460671e-06, - "loss": 0.7077, + "epoch": 0.13, + "grad_norm": 2.968788286250123, + "learning_rate": 9.745566867212483e-06, + "loss": 0.6042, "step": 1817 }, { - "epoch": 0.19, - "grad_norm": 1.9847132878411518, - "learning_rate": 9.33367314707478e-06, - "loss": 0.6625, + "epoch": 0.13, + "grad_norm": 1.7823263910581828, + "learning_rate": 9.745204829434429e-06, + "loss": 0.6, "step": 1818 }, { - "epoch": 0.19, - "grad_norm": 2.2682583137130137, - "learning_rate": 9.332822873215273e-06, - "loss": 0.6533, + "epoch": 0.13, + "grad_norm": 1.6930016049551133, + "learning_rate": 9.744842540998576e-06, + "loss": 0.6289, "step": 1819 }, { - "epoch": 0.19, - "grad_norm": 2.1574351053442125, - "learning_rate": 9.331972095980927e-06, - "loss": 0.6908, + "epoch": 0.13, + "grad_norm": 3.118471169363396, + "learning_rate": 9.744480001924062e-06, + "loss": 0.6185, "step": 1820 }, { - "epoch": 0.19, - "grad_norm": 2.495175865873836, - "learning_rate": 9.331120815470586e-06, - "loss": 0.69, + "epoch": 0.13, + "grad_norm": 1.5923845319176075, + "learning_rate": 9.744117212230038e-06, + "loss": 0.6263, "step": 1821 }, { - "epoch": 0.19, - "grad_norm": 2.5556685920907243, - "learning_rate": 9.330269031783147e-06, - "loss": 0.6904, + "epoch": 0.13, + "grad_norm": 1.6464883088006244, + "learning_rate": 9.743754171935668e-06, + "loss": 0.6258, "step": 1822 }, { - "epoch": 0.19, - "grad_norm": 2.50380795052326, - "learning_rate": 9.329416745017573e-06, - "loss": 0.6958, + "epoch": 0.13, + "grad_norm": 1.6735617586884581, + "learning_rate": 9.743390881060128e-06, + "loss": 0.6307, "step": 1823 }, { - "epoch": 0.19, - "grad_norm": 2.195310629239766, - "learning_rate": 9.328563955272873e-06, - "loss": 0.718, + "epoch": 0.13, + "grad_norm": 2.073981150557679, + "learning_rate": 9.743027339622609e-06, + "loss": 0.6098, "step": 1824 }, { - "epoch": 0.19, - "grad_norm": 2.596171146233937, - "learning_rate": 9.327710662648128e-06, - "loss": 0.6523, + "epoch": 0.13, + "grad_norm": 1.8490703665318342, + "learning_rate": 9.742663547642314e-06, + "loss": 0.5772, "step": 1825 }, { - "epoch": 0.19, - "grad_norm": 2.1903089534866957, - "learning_rate": 9.326856867242467e-06, - "loss": 0.6914, + "epoch": 0.13, + "grad_norm": 1.6370870315855133, + "learning_rate": 9.74229950513846e-06, + "loss": 0.5596, "step": 1826 }, { - "epoch": 0.19, - "grad_norm": 2.1949315431307785, - "learning_rate": 9.326002569155084e-06, - "loss": 0.6121, + "epoch": 0.13, + "grad_norm": 1.7245658535014168, + "learning_rate": 9.741935212130277e-06, + "loss": 0.5353, "step": 1827 }, { - "epoch": 0.19, - "grad_norm": 2.1196053273242206, - "learning_rate": 9.325147768485226e-06, - "loss": 0.7247, + "epoch": 0.13, + "grad_norm": 1.76488788251901, + "learning_rate": 9.741570668637009e-06, + "loss": 0.531, "step": 1828 }, { - "epoch": 0.19, - "grad_norm": 2.927602122432581, - "learning_rate": 9.324292465332205e-06, - "loss": 0.7023, + "epoch": 0.13, + "grad_norm": 2.121555657488959, + "learning_rate": 9.741205874677912e-06, + "loss": 0.6274, "step": 1829 }, { - "epoch": 0.19, - "grad_norm": 2.59333950514103, - "learning_rate": 9.323436659795384e-06, - "loss": 0.774, + "epoch": 0.13, + "grad_norm": 1.6008938025447474, + "learning_rate": 9.740840830272253e-06, + "loss": 0.5528, "step": 1830 }, { - "epoch": 0.19, - "grad_norm": 2.1860597709229794, - "learning_rate": 9.32258035197419e-06, - "loss": 0.6294, + "epoch": 0.13, + "grad_norm": 1.1258905454263233, + "learning_rate": 9.74047553543932e-06, + "loss": 0.4828, "step": 1831 }, { - "epoch": 0.19, - "grad_norm": 2.237668840439711, - "learning_rate": 9.321723541968106e-06, - "loss": 0.6915, + "epoch": 0.13, + "grad_norm": 1.036980778288693, + "learning_rate": 9.740109990198405e-06, + "loss": 0.4817, "step": 1832 }, { - "epoch": 0.19, - "grad_norm": 3.013676832988803, - "learning_rate": 9.320866229876674e-06, - "loss": 0.7642, + "epoch": 0.13, + "grad_norm": 1.626082657956783, + "learning_rate": 9.73974419456882e-06, + "loss": 0.5458, "step": 1833 }, { - "epoch": 0.19, - "grad_norm": 2.357450534390374, - "learning_rate": 9.320008415799496e-06, - "loss": 0.7363, + "epoch": 0.13, + "grad_norm": 1.8812303512649025, + "learning_rate": 9.739378148569882e-06, + "loss": 0.6286, "step": 1834 }, { - "epoch": 0.19, - "grad_norm": 3.2990217042269863, - "learning_rate": 9.319150099836225e-06, - "loss": 0.6567, + "epoch": 0.13, + "grad_norm": 2.0536167328882935, + "learning_rate": 9.739011852220934e-06, + "loss": 0.5576, "step": 1835 }, { - "epoch": 0.19, - "grad_norm": 2.4112889638082033, - "learning_rate": 9.318291282086582e-06, - "loss": 0.6427, + "epoch": 0.13, + "grad_norm": 2.2579630032365956, + "learning_rate": 9.738645305541323e-06, + "loss": 0.5114, "step": 1836 }, { - "epoch": 0.19, - "grad_norm": 2.5245440646141706, - "learning_rate": 9.317431962650339e-06, - "loss": 0.6699, + "epoch": 0.13, + "grad_norm": 0.9930640274728167, + "learning_rate": 9.738278508550409e-06, + "loss": 0.4685, "step": 1837 }, { - "epoch": 0.19, - "grad_norm": 2.1773949952495566, - "learning_rate": 9.316572141627334e-06, - "loss": 0.6629, + "epoch": 0.13, + "grad_norm": 1.7599488549702391, + "learning_rate": 9.737911461267569e-06, + "loss": 0.5631, "step": 1838 }, { - "epoch": 0.19, - "grad_norm": 2.0104341618703967, - "learning_rate": 9.315711819117452e-06, - "loss": 0.6719, + "epoch": 0.13, + "grad_norm": 1.6566367386726666, + "learning_rate": 9.737544163712192e-06, + "loss": 0.5902, "step": 1839 }, { - "epoch": 0.19, - "grad_norm": 2.5108935311841467, - "learning_rate": 9.31485099522065e-06, - "loss": 0.7134, + "epoch": 0.13, + "grad_norm": 1.7383763836761734, + "learning_rate": 9.737176615903677e-06, + "loss": 0.5485, "step": 1840 }, { - "epoch": 0.19, - "grad_norm": 2.695150335447739, - "learning_rate": 9.31398967003693e-06, - "loss": 0.8007, + "epoch": 0.13, + "grad_norm": 3.8174449743113494, + "learning_rate": 9.736808817861442e-06, + "loss": 0.6577, "step": 1841 }, { - "epoch": 0.19, - "grad_norm": 2.7187367089479895, - "learning_rate": 9.31312784366636e-06, - "loss": 0.6938, + "epoch": 0.13, + "grad_norm": 1.583372161788993, + "learning_rate": 9.736440769604917e-06, + "loss": 0.6362, "step": 1842 }, { - "epoch": 0.19, - "grad_norm": 2.266932399300048, - "learning_rate": 9.312265516209068e-06, - "loss": 0.7279, + "epoch": 0.13, + "grad_norm": 1.47932775671271, + "learning_rate": 9.736072471153539e-06, + "loss": 0.5893, "step": 1843 }, { - "epoch": 0.19, - "grad_norm": 2.064288579022452, - "learning_rate": 9.311402687765231e-06, - "loss": 0.6903, + "epoch": 0.13, + "grad_norm": 1.8968165397017982, + "learning_rate": 9.735703922526767e-06, + "loss": 0.5705, "step": 1844 }, { - "epoch": 0.19, - "grad_norm": 2.3054354931895165, - "learning_rate": 9.310539358435095e-06, - "loss": 0.6779, + "epoch": 0.13, + "grad_norm": 1.9733098276832608, + "learning_rate": 9.735335123744065e-06, + "loss": 0.5465, "step": 1845 }, { - "epoch": 0.19, - "grad_norm": 2.3704699001159244, - "learning_rate": 9.309675528318955e-06, - "loss": 0.7828, + "epoch": 0.13, + "grad_norm": 1.7147654002984136, + "learning_rate": 9.734966074824918e-06, + "loss": 0.5625, "step": 1846 }, { - "epoch": 0.19, - "grad_norm": 2.5359614229510274, - "learning_rate": 9.308811197517172e-06, - "loss": 0.6946, + "epoch": 0.13, + "grad_norm": 1.7218864041970574, + "learning_rate": 9.734596775788819e-06, + "loss": 0.5844, "step": 1847 }, { - "epoch": 0.19, - "grad_norm": 2.433416978342713, - "learning_rate": 9.307946366130158e-06, - "loss": 0.8163, + "epoch": 0.13, + "grad_norm": 2.1545241455697384, + "learning_rate": 9.734227226655273e-06, + "loss": 0.5952, "step": 1848 }, { - "epoch": 0.19, - "grad_norm": 2.6463567241215493, - "learning_rate": 9.307081034258389e-06, - "loss": 0.7162, + "epoch": 0.13, + "grad_norm": 1.7034475724160862, + "learning_rate": 9.733857427443805e-06, + "loss": 0.6328, "step": 1849 }, { - "epoch": 0.19, - "grad_norm": 3.2116023992699523, - "learning_rate": 9.306215202002396e-06, - "loss": 0.6796, + "epoch": 0.13, + "grad_norm": 1.8168758250070562, + "learning_rate": 9.733487378173946e-06, + "loss": 0.547, "step": 1850 }, { - "epoch": 0.19, - "grad_norm": 3.0445330680873828, - "learning_rate": 9.305348869462768e-06, - "loss": 0.7149, + "epoch": 0.13, + "grad_norm": 1.8909581894037046, + "learning_rate": 9.733117078865245e-06, + "loss": 0.6265, "step": 1851 }, { - "epoch": 0.19, - "grad_norm": 1.881081284323706, - "learning_rate": 9.304482036740154e-06, - "loss": 0.6436, + "epoch": 0.13, + "grad_norm": 1.7053347248752695, + "learning_rate": 9.732746529537262e-06, + "loss": 0.6338, "step": 1852 }, { - "epoch": 0.19, - "grad_norm": 2.4175242650695687, - "learning_rate": 9.30361470393526e-06, - "loss": 0.6238, + "epoch": 0.13, + "grad_norm": 1.760585292006874, + "learning_rate": 9.73237573020957e-06, + "loss": 0.5503, "step": 1853 }, { - "epoch": 0.2, - "grad_norm": 2.430329856197137, - "learning_rate": 9.302746871148852e-06, - "loss": 0.7318, + "epoch": 0.13, + "grad_norm": 1.5703752416545302, + "learning_rate": 9.732004680901758e-06, + "loss": 0.5205, "step": 1854 }, { - "epoch": 0.2, - "grad_norm": 2.1962368489134803, - "learning_rate": 9.301878538481748e-06, - "loss": 0.7108, + "epoch": 0.13, + "grad_norm": 1.7650736345884976, + "learning_rate": 9.731633381633422e-06, + "loss": 0.583, "step": 1855 }, { - "epoch": 0.2, - "grad_norm": 2.169152259585836, - "learning_rate": 9.30100970603483e-06, - "loss": 0.7173, + "epoch": 0.13, + "grad_norm": 1.6297027030570372, + "learning_rate": 9.73126183242418e-06, + "loss": 0.5837, "step": 1856 }, { - "epoch": 0.2, - "grad_norm": 2.463790448876148, - "learning_rate": 9.30014037390904e-06, - "loss": 0.689, + "epoch": 0.13, + "grad_norm": 2.0250382008703625, + "learning_rate": 9.730890033293656e-06, + "loss": 0.5748, "step": 1857 }, { - "epoch": 0.2, - "grad_norm": 2.368852330455152, - "learning_rate": 9.299270542205372e-06, - "loss": 0.6933, + "epoch": 0.13, + "grad_norm": 2.2960826455462477, + "learning_rate": 9.73051798426149e-06, + "loss": 0.5834, "step": 1858 }, { - "epoch": 0.2, - "grad_norm": 2.444593231468713, - "learning_rate": 9.298400211024878e-06, - "loss": 0.6799, + "epoch": 0.13, + "grad_norm": 2.2600734848769326, + "learning_rate": 9.730145685347334e-06, + "loss": 0.5832, "step": 1859 }, { - "epoch": 0.2, - "grad_norm": 1.8680880820924584, - "learning_rate": 9.297529380468675e-06, - "loss": 0.6574, + "epoch": 0.13, + "grad_norm": 1.9689673688637401, + "learning_rate": 9.729773136570855e-06, + "loss": 0.6047, "step": 1860 }, { - "epoch": 0.2, - "grad_norm": 2.948905283236975, - "learning_rate": 9.29665805063793e-06, - "loss": 0.6756, + "epoch": 0.13, + "grad_norm": 1.5493514070529597, + "learning_rate": 9.729400337951733e-06, + "loss": 0.5293, "step": 1861 }, { - "epoch": 0.2, - "grad_norm": 2.1963367833574643, - "learning_rate": 9.295786221633874e-06, - "loss": 0.634, + "epoch": 0.13, + "grad_norm": 3.066894156481255, + "learning_rate": 9.729027289509661e-06, + "loss": 0.6419, "step": 1862 }, { - "epoch": 0.2, - "grad_norm": 2.139790541038441, - "learning_rate": 9.294913893557792e-06, - "loss": 0.6734, + "epoch": 0.13, + "grad_norm": 1.8237152700483332, + "learning_rate": 9.72865399126434e-06, + "loss": 0.6412, "step": 1863 }, { - "epoch": 0.2, - "grad_norm": 2.619706209228136, - "learning_rate": 9.294041066511031e-06, - "loss": 0.6999, + "epoch": 0.13, + "grad_norm": 1.7185727047590058, + "learning_rate": 9.728280443235495e-06, + "loss": 0.506, "step": 1864 }, { - "epoch": 0.2, - "grad_norm": 2.409480108291939, - "learning_rate": 9.29316774059499e-06, - "loss": 0.6769, + "epoch": 0.13, + "grad_norm": 2.1365485331375758, + "learning_rate": 9.727906645442855e-06, + "loss": 0.5738, "step": 1865 }, { - "epoch": 0.2, - "grad_norm": 2.410329086374503, - "learning_rate": 9.29229391591113e-06, - "loss": 0.7089, + "epoch": 0.13, + "grad_norm": 2.655894481515866, + "learning_rate": 9.727532597906165e-06, + "loss": 0.5059, "step": 1866 }, { - "epoch": 0.2, - "grad_norm": 5.690751097339351, - "learning_rate": 9.291419592560973e-06, - "loss": 0.7696, + "epoch": 0.13, + "grad_norm": 1.6134682943932184, + "learning_rate": 9.727158300645184e-06, + "loss": 0.5201, "step": 1867 }, { - "epoch": 0.2, - "grad_norm": 2.69073549819373, - "learning_rate": 9.290544770646092e-06, - "loss": 0.6698, + "epoch": 0.13, + "grad_norm": 1.7467219475266151, + "learning_rate": 9.726783753679684e-06, + "loss": 0.5976, "step": 1868 }, { - "epoch": 0.2, - "grad_norm": 3.9808783369984093, - "learning_rate": 9.289669450268122e-06, - "loss": 0.6039, + "epoch": 0.13, + "grad_norm": 2.031132939208805, + "learning_rate": 9.726408957029451e-06, + "loss": 0.5283, "step": 1869 }, { - "epoch": 0.2, - "grad_norm": 2.15890459589978, - "learning_rate": 9.288793631528757e-06, - "loss": 0.6616, + "epoch": 0.13, + "grad_norm": 2.01141606139681, + "learning_rate": 9.72603391071428e-06, + "loss": 0.5229, "step": 1870 }, { - "epoch": 0.2, - "grad_norm": 2.081034392032343, - "learning_rate": 9.287917314529743e-06, - "loss": 0.682, + "epoch": 0.13, + "grad_norm": 2.2978469798015286, + "learning_rate": 9.725658614753983e-06, + "loss": 0.6048, "step": 1871 }, { - "epoch": 0.2, - "grad_norm": 2.1849941363664382, - "learning_rate": 9.287040499372893e-06, - "loss": 0.7173, + "epoch": 0.13, + "grad_norm": 1.8490977385323506, + "learning_rate": 9.725283069168387e-06, + "loss": 0.6608, "step": 1872 }, { - "epoch": 0.2, - "grad_norm": 2.4741373441095202, - "learning_rate": 9.286163186160067e-06, - "loss": 0.6348, + "epoch": 0.13, + "grad_norm": 1.6557494887540234, + "learning_rate": 9.724907273977328e-06, + "loss": 0.606, "step": 1873 }, { - "epoch": 0.2, - "grad_norm": 2.4013819058393944, - "learning_rate": 9.285285374993195e-06, - "loss": 0.641, + "epoch": 0.13, + "grad_norm": 1.777194410135706, + "learning_rate": 9.724531229200656e-06, + "loss": 0.5844, "step": 1874 }, { - "epoch": 0.2, - "grad_norm": 2.3332689108954336, - "learning_rate": 9.284407065974254e-06, - "loss": 0.7153, + "epoch": 0.13, + "grad_norm": 1.7980145414491835, + "learning_rate": 9.724154934858236e-06, + "loss": 0.6222, "step": 1875 }, { - "epoch": 0.2, - "grad_norm": 2.3415860761989378, - "learning_rate": 9.283528259205287e-06, - "loss": 0.7111, + "epoch": 0.13, + "grad_norm": 1.6523558845435453, + "learning_rate": 9.723778390969944e-06, + "loss": 0.564, "step": 1876 }, { - "epoch": 0.2, - "grad_norm": 2.5710052029402166, - "learning_rate": 9.282648954788387e-06, - "loss": 0.6825, + "epoch": 0.13, + "grad_norm": 1.9483233555565544, + "learning_rate": 9.723401597555671e-06, + "loss": 0.5447, "step": 1877 }, { - "epoch": 0.2, - "grad_norm": 2.172242362761334, - "learning_rate": 9.281769152825713e-06, - "loss": 0.6001, + "epoch": 0.13, + "grad_norm": 1.814751818228945, + "learning_rate": 9.723024554635321e-06, + "loss": 0.5982, "step": 1878 }, { - "epoch": 0.2, - "grad_norm": 2.202637528829396, - "learning_rate": 9.280888853419476e-06, - "loss": 0.6635, + "epoch": 0.13, + "grad_norm": 2.483563582137913, + "learning_rate": 9.722647262228811e-06, + "loss": 0.5865, "step": 1879 }, { - "epoch": 0.2, - "grad_norm": 4.4006855761361345, - "learning_rate": 9.280008056671947e-06, - "loss": 0.694, + "epoch": 0.13, + "grad_norm": 1.714570618215152, + "learning_rate": 9.722269720356068e-06, + "loss": 0.5833, "step": 1880 }, { - "epoch": 0.2, - "grad_norm": 2.831327947150695, - "learning_rate": 9.279126762685454e-06, - "loss": 0.636, + "epoch": 0.13, + "grad_norm": 1.5028603977931627, + "learning_rate": 9.721891929037039e-06, + "loss": 0.5628, "step": 1881 }, { - "epoch": 0.2, - "grad_norm": 2.6062353877854436, - "learning_rate": 9.278244971562382e-06, - "loss": 0.8014, + "epoch": 0.13, + "grad_norm": 1.7998178818993837, + "learning_rate": 9.721513888291677e-06, + "loss": 0.545, "step": 1882 }, { - "epoch": 0.2, - "grad_norm": 2.870705908895546, - "learning_rate": 9.277362683405177e-06, - "loss": 0.6577, + "epoch": 0.13, + "grad_norm": 1.8400516711438888, + "learning_rate": 9.721135598139954e-06, + "loss": 0.549, "step": 1883 }, { - "epoch": 0.2, - "grad_norm": 2.3116536973353057, - "learning_rate": 9.276479898316341e-06, - "loss": 0.6627, + "epoch": 0.13, + "grad_norm": 1.8150691848641167, + "learning_rate": 9.72075705860185e-06, + "loss": 0.5932, "step": 1884 }, { - "epoch": 0.2, - "grad_norm": 2.325166740616653, - "learning_rate": 9.275596616398431e-06, - "loss": 0.7138, + "epoch": 0.13, + "grad_norm": 2.100055762651, + "learning_rate": 9.720378269697363e-06, + "loss": 0.5723, "step": 1885 }, { - "epoch": 0.2, - "grad_norm": 3.764579252240071, - "learning_rate": 9.274712837754068e-06, - "loss": 0.6182, + "epoch": 0.13, + "grad_norm": 1.6386119826372871, + "learning_rate": 9.7199992314465e-06, + "loss": 0.5679, "step": 1886 }, { - "epoch": 0.2, - "grad_norm": 2.610119518405415, - "learning_rate": 9.273828562485923e-06, - "loss": 0.6369, + "epoch": 0.13, + "grad_norm": 2.0120321420240663, + "learning_rate": 9.719619943869285e-06, + "loss": 0.5796, "step": 1887 }, { - "epoch": 0.2, - "grad_norm": 2.3604350623510846, - "learning_rate": 9.272943790696728e-06, - "loss": 0.6644, + "epoch": 0.13, + "grad_norm": 1.759082562944045, + "learning_rate": 9.719240406985753e-06, + "loss": 0.5484, "step": 1888 }, { - "epoch": 0.2, - "grad_norm": 2.2652286769748495, - "learning_rate": 9.272058522489277e-06, - "loss": 0.7804, + "epoch": 0.13, + "grad_norm": 1.6118194118277673, + "learning_rate": 9.718860620815951e-06, + "loss": 0.5131, "step": 1889 }, { - "epoch": 0.2, - "grad_norm": 2.9511210502310745, - "learning_rate": 9.271172757966418e-06, - "loss": 0.6344, + "epoch": 0.13, + "grad_norm": 1.6960940787369176, + "learning_rate": 9.71848058537994e-06, + "loss": 0.5331, "step": 1890 }, { - "epoch": 0.2, - "grad_norm": 2.8169929100268867, - "learning_rate": 9.270286497231052e-06, - "loss": 0.697, + "epoch": 0.13, + "grad_norm": 2.015589647027879, + "learning_rate": 9.718100300697797e-06, + "loss": 0.588, "step": 1891 }, { - "epoch": 0.2, - "grad_norm": 3.7252767248143597, - "learning_rate": 9.269399740386146e-06, - "loss": 0.7239, + "epoch": 0.13, + "grad_norm": 2.020843184540193, + "learning_rate": 9.71771976678961e-06, + "loss": 0.5507, "step": 1892 }, { - "epoch": 0.2, - "grad_norm": 2.507005417930868, - "learning_rate": 9.26851248753472e-06, - "loss": 0.6427, + "epoch": 0.13, + "grad_norm": 1.8829302733971391, + "learning_rate": 9.717338983675479e-06, + "loss": 0.5602, "step": 1893 }, { - "epoch": 0.2, - "grad_norm": 2.8482802966189373, - "learning_rate": 9.267624738779853e-06, - "loss": 0.6975, + "epoch": 0.13, + "grad_norm": 1.8453451573266069, + "learning_rate": 9.716957951375517e-06, + "loss": 0.5794, "step": 1894 }, { - "epoch": 0.2, - "grad_norm": 2.8213003925408153, - "learning_rate": 9.266736494224677e-06, - "loss": 0.6433, + "epoch": 0.13, + "grad_norm": 1.6492758692754435, + "learning_rate": 9.716576669909854e-06, + "loss": 0.5723, "step": 1895 }, { - "epoch": 0.2, - "grad_norm": 2.436716166307937, - "learning_rate": 9.265847753972392e-06, - "loss": 0.6206, + "epoch": 0.13, + "grad_norm": 2.1699982274176697, + "learning_rate": 9.716195139298629e-06, + "loss": 0.5558, "step": 1896 }, { - "epoch": 0.2, - "grad_norm": 3.0067406772274063, - "learning_rate": 9.264958518126246e-06, - "loss": 0.7178, + "epoch": 0.13, + "grad_norm": 1.7691051978483867, + "learning_rate": 9.715813359561995e-06, + "loss": 0.5532, "step": 1897 }, { - "epoch": 0.2, - "grad_norm": 2.652398231372856, - "learning_rate": 9.264068786789546e-06, - "loss": 0.7469, + "epoch": 0.13, + "grad_norm": 3.399678167547627, + "learning_rate": 9.715431330720121e-06, + "loss": 0.6217, "step": 1898 }, { - "epoch": 0.2, - "grad_norm": 2.903751071631814, - "learning_rate": 9.263178560065664e-06, - "loss": 0.6284, + "epoch": 0.13, + "grad_norm": 1.8102488774941672, + "learning_rate": 9.715049052793187e-06, + "loss": 0.5988, "step": 1899 }, { - "epoch": 0.2, - "grad_norm": 2.233324204587242, - "learning_rate": 9.262287838058017e-06, - "loss": 0.654, + "epoch": 0.13, + "grad_norm": 2.099223442300193, + "learning_rate": 9.714666525801385e-06, + "loss": 0.5887, "step": 1900 }, { - "epoch": 0.2, - "grad_norm": 5.426621816553852, - "learning_rate": 9.261396620870092e-06, - "loss": 0.6874, + "epoch": 0.13, + "grad_norm": 1.6393671314051599, + "learning_rate": 9.71428374976492e-06, + "loss": 0.5324, "step": 1901 }, { - "epoch": 0.2, - "grad_norm": 2.9434733409299283, - "learning_rate": 9.260504908605425e-06, - "loss": 0.6296, + "epoch": 0.13, + "grad_norm": 1.0684902221418966, + "learning_rate": 9.713900724704012e-06, + "loss": 0.4965, "step": 1902 }, { - "epoch": 0.2, - "grad_norm": 1.370239117918409, - "learning_rate": 9.259612701367615e-06, - "loss": 0.6566, + "epoch": 0.14, + "grad_norm": 1.611745862922129, + "learning_rate": 9.713517450638898e-06, + "loss": 0.5219, "step": 1903 }, { - "epoch": 0.2, - "grad_norm": 2.306513335576572, - "learning_rate": 9.258719999260315e-06, - "loss": 0.6967, + "epoch": 0.14, + "grad_norm": 1.9839823738927387, + "learning_rate": 9.71313392758982e-06, + "loss": 0.5911, "step": 1904 }, { - "epoch": 0.2, - "grad_norm": 2.385612710261463, - "learning_rate": 9.257826802387234e-06, - "loss": 0.6995, + "epoch": 0.14, + "grad_norm": 2.0530300227522003, + "learning_rate": 9.712750155577038e-06, + "loss": 0.5495, "step": 1905 }, { - "epoch": 0.2, - "grad_norm": 2.8909613530557707, - "learning_rate": 9.256933110852145e-06, - "loss": 0.6234, + "epoch": 0.14, + "grad_norm": 5.351509868113161, + "learning_rate": 9.712366134620822e-06, + "loss": 0.6077, "step": 1906 }, { - "epoch": 0.2, - "grad_norm": 2.956138783299861, - "learning_rate": 9.25603892475887e-06, - "loss": 0.7436, + "epoch": 0.14, + "grad_norm": 2.1900013432899224, + "learning_rate": 9.71198186474146e-06, + "loss": 0.5641, "step": 1907 }, { - "epoch": 0.2, - "grad_norm": 3.9130492645144934, - "learning_rate": 9.255144244211299e-06, - "loss": 0.7194, + "epoch": 0.14, + "grad_norm": 3.342608726423243, + "learning_rate": 9.711597345959249e-06, + "loss": 0.5781, "step": 1908 }, { - "epoch": 0.2, - "grad_norm": 2.8433841114866922, - "learning_rate": 9.254249069313368e-06, - "loss": 0.6685, + "epoch": 0.14, + "grad_norm": 2.598274876517335, + "learning_rate": 9.711212578294501e-06, + "loss": 0.5918, "step": 1909 }, { - "epoch": 0.2, - "grad_norm": 3.0403922052487427, - "learning_rate": 9.253353400169078e-06, - "loss": 0.6988, + "epoch": 0.14, + "grad_norm": 1.6222691202148796, + "learning_rate": 9.710827561767544e-06, + "loss": 0.5537, "step": 1910 }, { - "epoch": 0.2, - "grad_norm": 2.3083987036017835, - "learning_rate": 9.252457236882487e-06, - "loss": 0.659, + "epoch": 0.14, + "grad_norm": 1.8015162911865747, + "learning_rate": 9.71044229639871e-06, + "loss": 0.5615, "step": 1911 }, { - "epoch": 0.2, - "grad_norm": 2.636186973720048, - "learning_rate": 9.251560579557705e-06, - "loss": 0.6196, + "epoch": 0.14, + "grad_norm": 0.8631502143605907, + "learning_rate": 9.710056782208352e-06, + "loss": 0.466, "step": 1912 }, { - "epoch": 0.2, - "grad_norm": 2.3354347387696337, - "learning_rate": 9.250663428298906e-06, - "loss": 0.7811, + "epoch": 0.14, + "grad_norm": 11.24048346996647, + "learning_rate": 9.709671019216838e-06, + "loss": 0.5897, "step": 1913 }, { - "epoch": 0.2, - "grad_norm": 3.3940926366504227, - "learning_rate": 9.249765783210316e-06, - "loss": 0.6968, + "epoch": 0.14, + "grad_norm": 1.9351564928950402, + "learning_rate": 9.70928500744454e-06, + "loss": 0.6004, "step": 1914 }, { - "epoch": 0.2, - "grad_norm": 1.9781144300478661, - "learning_rate": 9.248867644396224e-06, - "loss": 0.7138, + "epoch": 0.14, + "grad_norm": 2.1877130009984307, + "learning_rate": 9.70889874691185e-06, + "loss": 0.5135, "step": 1915 }, { - "epoch": 0.2, - "grad_norm": 6.221082262034515, - "learning_rate": 9.24796901196097e-06, - "loss": 0.7758, + "epoch": 0.14, + "grad_norm": 1.5355295884907905, + "learning_rate": 9.708512237639174e-06, + "loss": 0.5874, "step": 1916 }, { - "epoch": 0.2, - "grad_norm": 2.281625973109882, - "learning_rate": 9.247069886008957e-06, - "loss": 0.6453, + "epoch": 0.14, + "grad_norm": 2.0231000565028885, + "learning_rate": 9.708125479646926e-06, + "loss": 0.5659, "step": 1917 }, { - "epoch": 0.2, - "grad_norm": 2.0495291535897127, - "learning_rate": 9.24617026664464e-06, - "loss": 0.6724, + "epoch": 0.14, + "grad_norm": 2.032614284070895, + "learning_rate": 9.707738472955536e-06, + "loss": 0.5441, "step": 1918 }, { - "epoch": 0.2, - "grad_norm": 2.294555365241956, - "learning_rate": 9.245270153972537e-06, - "loss": 0.6348, + "epoch": 0.14, + "grad_norm": 2.2745889511277335, + "learning_rate": 9.70735121758545e-06, + "loss": 0.6034, "step": 1919 }, { - "epoch": 0.2, - "grad_norm": 2.6583206062968143, - "learning_rate": 9.244369548097218e-06, - "loss": 0.7125, + "epoch": 0.14, + "grad_norm": 1.7299876620398278, + "learning_rate": 9.70696371355712e-06, + "loss": 0.6339, "step": 1920 }, { - "epoch": 0.2, - "grad_norm": 1.2992496345079894, - "learning_rate": 9.243468449123316e-06, - "loss": 0.6501, + "epoch": 0.14, + "grad_norm": 1.8988464421356506, + "learning_rate": 9.706575960891019e-06, + "loss": 0.5601, "step": 1921 }, { - "epoch": 0.2, - "grad_norm": 2.294260337838386, - "learning_rate": 9.242566857155515e-06, - "loss": 0.6783, + "epoch": 0.14, + "grad_norm": 1.8520466579912147, + "learning_rate": 9.706187959607627e-06, + "loss": 0.5738, "step": 1922 }, { - "epoch": 0.2, - "grad_norm": 2.2677177768740107, - "learning_rate": 9.241664772298561e-06, - "loss": 0.7314, + "epoch": 0.14, + "grad_norm": 1.7990614959943028, + "learning_rate": 9.70579970972744e-06, + "loss": 0.5709, "step": 1923 }, { - "epoch": 0.2, - "grad_norm": 2.5676248737182172, - "learning_rate": 9.240762194657254e-06, - "loss": 0.7354, + "epoch": 0.14, + "grad_norm": 0.9774675755811699, + "learning_rate": 9.705411211270966e-06, + "loss": 0.4507, "step": 1924 }, { - "epoch": 0.2, - "grad_norm": 2.456584308459011, - "learning_rate": 9.239859124336457e-06, - "loss": 0.7148, + "epoch": 0.14, + "grad_norm": 2.146400391484652, + "learning_rate": 9.70502246425873e-06, + "loss": 0.6161, "step": 1925 }, { - "epoch": 0.2, - "grad_norm": 2.5953088609259134, - "learning_rate": 9.23895556144108e-06, - "loss": 0.7198, + "epoch": 0.14, + "grad_norm": 0.8970430579912533, + "learning_rate": 9.704633468711262e-06, + "loss": 0.4492, "step": 1926 }, { - "epoch": 0.2, - "grad_norm": 2.144466625240076, - "learning_rate": 9.2380515060761e-06, - "loss": 0.6582, + "epoch": 0.14, + "grad_norm": 1.9374378448836693, + "learning_rate": 9.704244224649116e-06, + "loss": 0.5926, "step": 1927 }, { - "epoch": 0.2, - "grad_norm": 2.821280176023632, - "learning_rate": 9.237146958346549e-06, - "loss": 0.6836, + "epoch": 0.14, + "grad_norm": 4.649329743622021, + "learning_rate": 9.703854732092846e-06, + "loss": 0.5931, "step": 1928 }, { - "epoch": 0.2, - "grad_norm": 2.012627315683036, - "learning_rate": 9.236241918357511e-06, - "loss": 0.7583, + "epoch": 0.14, + "grad_norm": 1.7104544480288595, + "learning_rate": 9.703464991063032e-06, + "loss": 0.5394, "step": 1929 }, { - "epoch": 0.2, - "grad_norm": 2.3886139997345825, - "learning_rate": 9.235336386214133e-06, - "loss": 0.6987, + "epoch": 0.14, + "grad_norm": 2.0388432311162843, + "learning_rate": 9.70307500158026e-06, + "loss": 0.5898, "step": 1930 }, { - "epoch": 0.2, - "grad_norm": 3.5518418067148287, - "learning_rate": 9.234430362021615e-06, - "loss": 0.7171, + "epoch": 0.14, + "grad_norm": 1.6285930606764576, + "learning_rate": 9.702684763665131e-06, + "loss": 0.6785, "step": 1931 }, { - "epoch": 0.2, - "grad_norm": 2.0640715116734265, - "learning_rate": 9.233523845885221e-06, - "loss": 0.5934, + "epoch": 0.14, + "grad_norm": 1.664734745840734, + "learning_rate": 9.702294277338257e-06, + "loss": 0.4916, "step": 1932 }, { - "epoch": 0.2, - "grad_norm": 2.5462408850965303, - "learning_rate": 9.232616837910263e-06, - "loss": 0.6674, + "epoch": 0.14, + "grad_norm": 1.8824961457439187, + "learning_rate": 9.701903542620266e-06, + "loss": 0.6205, "step": 1933 }, { - "epoch": 0.2, - "grad_norm": 2.1027514803586316, - "learning_rate": 9.231709338202117e-06, - "loss": 0.7143, + "epoch": 0.14, + "grad_norm": 1.4826762065985195, + "learning_rate": 9.701512559531796e-06, + "loss": 0.5693, "step": 1934 }, { - "epoch": 0.2, - "grad_norm": 1.981837777190229, - "learning_rate": 9.230801346866212e-06, - "loss": 0.6824, + "epoch": 0.14, + "grad_norm": 1.9448054327895665, + "learning_rate": 9.701121328093503e-06, + "loss": 0.6109, "step": 1935 }, { - "epoch": 0.2, - "grad_norm": 3.0072257637585995, - "learning_rate": 9.229892864008037e-06, - "loss": 0.6737, + "epoch": 0.14, + "grad_norm": 1.8684682885153396, + "learning_rate": 9.700729848326053e-06, + "loss": 0.6196, "step": 1936 }, { - "epoch": 0.2, - "grad_norm": 2.404301234252323, - "learning_rate": 9.228983889733135e-06, - "loss": 0.72, + "epoch": 0.14, + "grad_norm": 1.7224208480919332, + "learning_rate": 9.700338120250123e-06, + "loss": 0.6297, "step": 1937 }, { - "epoch": 0.2, - "grad_norm": 2.1099503574763405, - "learning_rate": 9.228074424147111e-06, - "loss": 0.7708, + "epoch": 0.14, + "grad_norm": 0.9038897641747816, + "learning_rate": 9.699946143886406e-06, + "loss": 0.4946, "step": 1938 }, { - "epoch": 0.2, - "grad_norm": 2.1564706992929987, - "learning_rate": 9.227164467355621e-06, - "loss": 0.7047, + "epoch": 0.14, + "grad_norm": 1.7481379030291944, + "learning_rate": 9.699553919255609e-06, + "loss": 0.6254, "step": 1939 }, { - "epoch": 0.2, - "grad_norm": 2.717145887514134, - "learning_rate": 9.226254019464384e-06, - "loss": 0.7386, + "epoch": 0.14, + "grad_norm": 2.271531649959383, + "learning_rate": 9.699161446378449e-06, + "loss": 0.4865, "step": 1940 }, { - "epoch": 0.2, - "grad_norm": 3.434512344610932, - "learning_rate": 9.225343080579171e-06, - "loss": 0.7586, + "epoch": 0.14, + "grad_norm": 2.03328685203979, + "learning_rate": 9.69876872527566e-06, + "loss": 0.587, "step": 1941 }, { - "epoch": 0.2, - "grad_norm": 2.516975033266961, - "learning_rate": 9.224431650805814e-06, - "loss": 0.721, + "epoch": 0.14, + "grad_norm": 2.1237433624760302, + "learning_rate": 9.698375755967983e-06, + "loss": 0.6056, "step": 1942 }, { - "epoch": 0.2, - "grad_norm": 2.0749482225336697, - "learning_rate": 9.223519730250198e-06, - "loss": 0.6007, + "epoch": 0.14, + "grad_norm": 0.9295434955173402, + "learning_rate": 9.69798253847618e-06, + "loss": 0.4576, "step": 1943 }, { - "epoch": 0.2, - "grad_norm": 2.5096562394219775, - "learning_rate": 9.222607319018271e-06, - "loss": 0.7729, + "epoch": 0.14, + "grad_norm": 1.71672010487686, + "learning_rate": 9.697589072821022e-06, + "loss": 0.5861, "step": 1944 }, { - "epoch": 0.2, - "grad_norm": 1.9924376463945759, - "learning_rate": 9.221694417216031e-06, - "loss": 0.7065, + "epoch": 0.14, + "grad_norm": 1.5909691941652666, + "learning_rate": 9.697195359023287e-06, + "loss": 0.5475, "step": 1945 }, { - "epoch": 0.2, - "grad_norm": 3.4082767933565594, - "learning_rate": 9.220781024949536e-06, - "loss": 0.7151, + "epoch": 0.14, + "grad_norm": 1.796772956489963, + "learning_rate": 9.696801397103782e-06, + "loss": 0.5856, "step": 1946 }, { - "epoch": 0.2, - "grad_norm": 2.157525030220561, - "learning_rate": 9.219867142324904e-06, - "loss": 0.6323, + "epoch": 0.14, + "grad_norm": 2.050801229647497, + "learning_rate": 9.696407187083308e-06, + "loss": 0.6173, "step": 1947 }, { - "epoch": 0.2, - "grad_norm": 2.809345903838342, - "learning_rate": 9.218952769448307e-06, - "loss": 0.6643, + "epoch": 0.14, + "grad_norm": 2.602084215925719, + "learning_rate": 9.696012728982695e-06, + "loss": 0.5869, "step": 1948 }, { - "epoch": 0.21, - "grad_norm": 2.1126315310721426, - "learning_rate": 9.218037906425971e-06, - "loss": 0.66, + "epoch": 0.14, + "grad_norm": 1.9772914360569471, + "learning_rate": 9.695618022822778e-06, + "loss": 0.5087, "step": 1949 }, { - "epoch": 0.21, - "grad_norm": 2.106123096050007, - "learning_rate": 9.217122553364184e-06, - "loss": 0.779, + "epoch": 0.14, + "grad_norm": 2.1279420134004976, + "learning_rate": 9.695223068624403e-06, + "loss": 0.5749, "step": 1950 }, { - "epoch": 0.21, - "grad_norm": 2.4264702932506927, - "learning_rate": 9.21620671036929e-06, - "loss": 0.7279, + "epoch": 0.14, + "grad_norm": 1.8610654370320527, + "learning_rate": 9.694827866408438e-06, + "loss": 0.5847, "step": 1951 }, { - "epoch": 0.21, - "grad_norm": 8.387568989843238, - "learning_rate": 9.215290377547688e-06, - "loss": 0.576, + "epoch": 0.14, + "grad_norm": 2.396920740028801, + "learning_rate": 9.694432416195757e-06, + "loss": 0.5395, "step": 1952 }, { - "epoch": 0.21, - "grad_norm": 2.721569909570348, - "learning_rate": 9.214373555005834e-06, - "loss": 0.6169, + "epoch": 0.14, + "grad_norm": 1.582001326784994, + "learning_rate": 9.694036718007248e-06, + "loss": 0.5325, "step": 1953 }, { - "epoch": 0.21, - "grad_norm": 2.5751826747005553, - "learning_rate": 9.213456242850245e-06, - "loss": 0.7445, + "epoch": 0.14, + "grad_norm": 1.8012946702969606, + "learning_rate": 9.693640771863814e-06, + "loss": 0.6553, "step": 1954 }, { - "epoch": 0.21, - "grad_norm": 5.238179252301112, - "learning_rate": 9.21253844118749e-06, - "loss": 0.6893, + "epoch": 0.14, + "grad_norm": 2.05723679921335, + "learning_rate": 9.693244577786372e-06, + "loss": 0.5617, "step": 1955 }, { - "epoch": 0.21, - "grad_norm": 2.230727560495232, - "learning_rate": 9.211620150124192e-06, - "loss": 0.7169, + "epoch": 0.14, + "grad_norm": 1.941173465825226, + "learning_rate": 9.692848135795847e-06, + "loss": 0.614, "step": 1956 }, { - "epoch": 0.21, - "grad_norm": 2.5021330652535645, - "learning_rate": 9.210701369767043e-06, - "loss": 0.7869, + "epoch": 0.14, + "grad_norm": 1.6054274052580888, + "learning_rate": 9.692451445913182e-06, + "loss": 0.5614, "step": 1957 }, { - "epoch": 0.21, - "grad_norm": 2.7468363192392427, - "learning_rate": 9.20978210022278e-06, - "loss": 0.7359, + "epoch": 0.14, + "grad_norm": 1.8996110786215692, + "learning_rate": 9.692054508159332e-06, + "loss": 0.5908, "step": 1958 }, { - "epoch": 0.21, - "grad_norm": 2.0826919441428458, - "learning_rate": 9.208862341598201e-06, - "loss": 0.7324, + "epoch": 0.14, + "grad_norm": 1.827018735892037, + "learning_rate": 9.691657322555264e-06, + "loss": 0.5196, "step": 1959 }, { - "epoch": 0.21, - "grad_norm": 2.7555569094425993, - "learning_rate": 9.207942094000163e-06, - "loss": 0.7457, + "epoch": 0.14, + "grad_norm": 2.1052370586600406, + "learning_rate": 9.691259889121958e-06, + "loss": 0.5755, "step": 1960 }, { - "epoch": 0.21, - "grad_norm": 3.1278899651708088, - "learning_rate": 9.207021357535576e-06, - "loss": 0.7093, + "epoch": 0.14, + "grad_norm": 1.572889371364559, + "learning_rate": 9.69086220788041e-06, + "loss": 0.5068, "step": 1961 }, { - "epoch": 0.21, - "grad_norm": 2.304923508024911, - "learning_rate": 9.206100132311408e-06, - "loss": 0.6779, + "epoch": 0.14, + "grad_norm": 1.6116238587579266, + "learning_rate": 9.690464278851623e-06, + "loss": 0.5952, "step": 1962 }, { - "epoch": 0.21, - "grad_norm": 2.3962706909638007, - "learning_rate": 9.205178418434687e-06, - "loss": 0.7315, + "epoch": 0.14, + "grad_norm": 1.6474466374896115, + "learning_rate": 9.69006610205662e-06, + "loss": 0.4883, "step": 1963 }, { - "epoch": 0.21, - "grad_norm": 2.3044315673666205, - "learning_rate": 9.204256216012493e-06, - "loss": 0.7382, + "epoch": 0.14, + "grad_norm": 1.760255423271644, + "learning_rate": 9.689667677516434e-06, + "loss": 0.631, "step": 1964 }, { - "epoch": 0.21, - "grad_norm": 2.4732648879760863, - "learning_rate": 9.203333525151964e-06, - "loss": 0.7135, + "epoch": 0.14, + "grad_norm": 0.9136329100588662, + "learning_rate": 9.689269005252112e-06, + "loss": 0.4837, "step": 1965 }, { - "epoch": 0.21, - "grad_norm": 2.206789161392687, - "learning_rate": 9.202410345960298e-06, - "loss": 0.7343, + "epoch": 0.14, + "grad_norm": 0.9277274326514918, + "learning_rate": 9.68887008528471e-06, + "loss": 0.4508, "step": 1966 }, { - "epoch": 0.21, - "grad_norm": 2.4651985133702827, - "learning_rate": 9.201486678544745e-06, - "loss": 0.718, + "epoch": 0.14, + "grad_norm": 1.6343790951066772, + "learning_rate": 9.688470917635302e-06, + "loss": 0.6, "step": 1967 }, { - "epoch": 0.21, - "grad_norm": 2.2568110947549407, - "learning_rate": 9.200562523012615e-06, - "loss": 0.6678, + "epoch": 0.14, + "grad_norm": 1.9277144648580316, + "learning_rate": 9.688071502324973e-06, + "loss": 0.6062, "step": 1968 }, { - "epoch": 0.21, - "grad_norm": 2.4466689065325182, - "learning_rate": 9.199637879471272e-06, - "loss": 0.7278, - "step": 1969 + "epoch": 0.14, + "grad_norm": 1.5874142834021694, + "learning_rate": 9.687671839374822e-06, + "loss": 0.6886, + "step": 1969 }, { - "epoch": 0.21, - "grad_norm": 2.7412446183420593, - "learning_rate": 9.198712748028142e-06, - "loss": 0.6746, + "epoch": 0.14, + "grad_norm": 1.870420020364407, + "learning_rate": 9.68727192880596e-06, + "loss": 0.6541, "step": 1970 }, { - "epoch": 0.21, - "grad_norm": 2.090127601569364, - "learning_rate": 9.197787128790702e-06, - "loss": 0.7454, + "epoch": 0.14, + "grad_norm": 2.513852617536507, + "learning_rate": 9.686871770639514e-06, + "loss": 0.5583, "step": 1971 }, { - "epoch": 0.21, - "grad_norm": 2.2493962206295457, - "learning_rate": 9.19686102186649e-06, - "loss": 0.6949, + "epoch": 0.14, + "grad_norm": 2.085738095223795, + "learning_rate": 9.686471364896618e-06, + "loss": 0.535, "step": 1972 }, { - "epoch": 0.21, - "grad_norm": 2.4780855364247603, - "learning_rate": 9.195934427363093e-06, - "loss": 0.7695, + "epoch": 0.14, + "grad_norm": 1.7433581873596748, + "learning_rate": 9.686070711598428e-06, + "loss": 0.516, "step": 1973 }, { - "epoch": 0.21, - "grad_norm": 2.720134076492023, - "learning_rate": 9.195007345388165e-06, - "loss": 0.7939, + "epoch": 0.14, + "grad_norm": 2.300899630958353, + "learning_rate": 9.685669810766101e-06, + "loss": 0.6691, "step": 1974 }, { - "epoch": 0.21, - "grad_norm": 2.6619081847464705, - "learning_rate": 9.19407977604941e-06, - "loss": 0.7645, + "epoch": 0.14, + "grad_norm": 2.068259906561549, + "learning_rate": 9.68526866242082e-06, + "loss": 0.5745, "step": 1975 }, { - "epoch": 0.21, - "grad_norm": 2.1669764589008986, - "learning_rate": 9.193151719454591e-06, - "loss": 0.658, + "epoch": 0.14, + "grad_norm": 1.9647331242694106, + "learning_rate": 9.684867266583768e-06, + "loss": 0.5493, "step": 1976 }, { - "epoch": 0.21, - "grad_norm": 2.7606276779843375, - "learning_rate": 9.192223175711526e-06, - "loss": 0.641, + "epoch": 0.14, + "grad_norm": 2.6839497282681584, + "learning_rate": 9.684465623276156e-06, + "loss": 0.5272, "step": 1977 }, { - "epoch": 0.21, - "grad_norm": 2.5503781978393776, - "learning_rate": 9.191294144928091e-06, - "loss": 0.7459, + "epoch": 0.14, + "grad_norm": 2.187898736097372, + "learning_rate": 9.684063732519195e-06, + "loss": 0.5279, "step": 1978 }, { - "epoch": 0.21, - "grad_norm": 2.2113194458441345, - "learning_rate": 9.190364627212216e-06, - "loss": 0.6788, + "epoch": 0.14, + "grad_norm": 1.66899063538525, + "learning_rate": 9.683661594334117e-06, + "loss": 0.6323, "step": 1979 }, { - "epoch": 0.21, - "grad_norm": 3.178914025765916, - "learning_rate": 9.189434622671894e-06, - "loss": 0.6719, + "epoch": 0.14, + "grad_norm": 2.2623434928564436, + "learning_rate": 9.683259208742165e-06, + "loss": 0.4858, "step": 1980 }, { - "epoch": 0.21, - "grad_norm": 2.684741530038339, - "learning_rate": 9.188504131415167e-06, - "loss": 0.6779, + "epoch": 0.14, + "grad_norm": 1.3725364384574403, + "learning_rate": 9.68285657576459e-06, + "loss": 0.5917, "step": 1981 }, { - "epoch": 0.21, - "grad_norm": 2.245253383160478, - "learning_rate": 9.187573153550139e-06, - "loss": 0.6422, + "epoch": 0.14, + "grad_norm": 1.7046568008476424, + "learning_rate": 9.682453695422663e-06, + "loss": 0.575, "step": 1982 }, { - "epoch": 0.21, - "grad_norm": 2.5644319947882055, - "learning_rate": 9.186641689184966e-06, - "loss": 0.7558, + "epoch": 0.14, + "grad_norm": 1.8096622026911067, + "learning_rate": 9.682050567737665e-06, + "loss": 0.5119, "step": 1983 }, { - "epoch": 0.21, - "grad_norm": 2.6626949713612134, - "learning_rate": 9.185709738427864e-06, - "loss": 0.731, + "epoch": 0.14, + "grad_norm": 1.9693565074965238, + "learning_rate": 9.681647192730893e-06, + "loss": 0.6046, "step": 1984 }, { - "epoch": 0.21, - "grad_norm": 5.227738894627522, - "learning_rate": 9.184777301387104e-06, - "loss": 0.7663, + "epoch": 0.14, + "grad_norm": 2.2496369993399457, + "learning_rate": 9.681243570423651e-06, + "loss": 0.5687, "step": 1985 }, { - "epoch": 0.21, - "grad_norm": 2.3446022774136024, - "learning_rate": 9.183844378171016e-06, - "loss": 0.6564, + "epoch": 0.14, + "grad_norm": 1.8949705524076383, + "learning_rate": 9.680839700837262e-06, + "loss": 0.5801, "step": 1986 }, { - "epoch": 0.21, - "grad_norm": 2.432195543728687, - "learning_rate": 9.182910968887982e-06, - "loss": 0.7728, + "epoch": 0.14, + "grad_norm": 1.4363768872579783, + "learning_rate": 9.68043558399306e-06, + "loss": 0.6023, "step": 1987 }, { - "epoch": 0.21, - "grad_norm": 2.4823577607004146, - "learning_rate": 9.181977073646442e-06, - "loss": 0.6328, + "epoch": 0.14, + "grad_norm": 1.7356690230493972, + "learning_rate": 9.68003121991239e-06, + "loss": 0.5375, "step": 1988 }, { - "epoch": 0.21, - "grad_norm": 2.177220583837391, - "learning_rate": 9.181042692554894e-06, - "loss": 0.6911, + "epoch": 0.14, + "grad_norm": 1.6621643358883686, + "learning_rate": 9.679626608616612e-06, + "loss": 0.5774, "step": 1989 }, { - "epoch": 0.21, - "grad_norm": 2.483575743653078, - "learning_rate": 9.180107825721891e-06, - "loss": 0.7007, + "epoch": 0.14, + "grad_norm": 1.852317528650638, + "learning_rate": 9.6792217501271e-06, + "loss": 0.5591, "step": 1990 }, { - "epoch": 0.21, - "grad_norm": 2.109985363397772, - "learning_rate": 9.179172473256046e-06, - "loss": 0.688, + "epoch": 0.14, + "grad_norm": 1.6414986513098953, + "learning_rate": 9.678816644465242e-06, + "loss": 0.591, "step": 1991 }, { - "epoch": 0.21, - "grad_norm": 3.0644562546243423, - "learning_rate": 9.178236635266025e-06, - "loss": 0.6994, + "epoch": 0.14, + "grad_norm": 1.7635742077856367, + "learning_rate": 9.678411291652431e-06, + "loss": 0.5377, "step": 1992 }, { - "epoch": 0.21, - "grad_norm": 2.1056210922690526, - "learning_rate": 9.17730031186055e-06, - "loss": 0.7325, + "epoch": 0.14, + "grad_norm": 1.6372879328334657, + "learning_rate": 9.678005691710086e-06, + "loss": 0.4852, "step": 1993 }, { - "epoch": 0.21, - "grad_norm": 2.249585098272686, - "learning_rate": 9.176363503148397e-06, - "loss": 0.698, + "epoch": 0.14, + "grad_norm": 2.6022515437347913, + "learning_rate": 9.677599844659628e-06, + "loss": 0.668, "step": 1994 }, { - "epoch": 0.21, - "grad_norm": 2.57223165623908, - "learning_rate": 9.175426209238407e-06, - "loss": 0.6476, + "epoch": 0.14, + "grad_norm": 1.4613814713064646, + "learning_rate": 9.677193750522498e-06, + "loss": 0.5798, "step": 1995 }, { - "epoch": 0.21, - "grad_norm": 3.6183458052615665, - "learning_rate": 9.17448843023947e-06, - "loss": 0.8366, + "epoch": 0.14, + "grad_norm": 1.4948216965298493, + "learning_rate": 9.676787409320144e-06, + "loss": 0.5133, "step": 1996 }, { - "epoch": 0.21, - "grad_norm": 2.072274750411721, - "learning_rate": 9.173550166260533e-06, - "loss": 0.7383, + "epoch": 0.14, + "grad_norm": 1.65009310097062, + "learning_rate": 9.676380821074033e-06, + "loss": 0.5519, "step": 1997 }, { - "epoch": 0.21, - "grad_norm": 2.3127023094424963, - "learning_rate": 9.172611417410604e-06, - "loss": 0.6212, + "epoch": 0.14, + "grad_norm": 1.8144986595841188, + "learning_rate": 9.67597398580564e-06, + "loss": 0.5785, "step": 1998 }, { - "epoch": 0.21, - "grad_norm": 2.3387267206899556, - "learning_rate": 9.17167218379874e-06, - "loss": 0.7201, + "epoch": 0.14, + "grad_norm": 2.335556065755507, + "learning_rate": 9.675566903536458e-06, + "loss": 0.5406, "step": 1999 }, { - "epoch": 0.21, - "grad_norm": 2.311361411354398, - "learning_rate": 9.170732465534062e-06, - "loss": 0.7375, + "epoch": 0.14, + "grad_norm": 1.9993611534751266, + "learning_rate": 9.675159574287989e-06, + "loss": 0.6308, "step": 2000 }, { - "epoch": 0.21, - "grad_norm": 2.2786653146755618, - "learning_rate": 9.169792262725744e-06, - "loss": 0.7097, + "epoch": 0.14, + "grad_norm": 2.0519278690989444, + "learning_rate": 9.674751998081748e-06, + "loss": 0.5789, "step": 2001 }, { - "epoch": 0.21, - "grad_norm": 2.107809653312181, - "learning_rate": 9.168851575483013e-06, - "loss": 0.6939, + "epoch": 0.14, + "grad_norm": 1.7465686150660946, + "learning_rate": 9.674344174939268e-06, + "loss": 0.6299, "step": 2002 }, { - "epoch": 0.21, - "grad_norm": 2.057099189874853, - "learning_rate": 9.167910403915157e-06, - "loss": 0.7254, + "epoch": 0.14, + "grad_norm": 1.6411894511358684, + "learning_rate": 9.673936104882089e-06, + "loss": 0.6229, "step": 2003 }, { - "epoch": 0.21, - "grad_norm": 2.445940571938724, - "learning_rate": 9.16696874813152e-06, - "loss": 0.6684, + "epoch": 0.14, + "grad_norm": 1.6629277508832108, + "learning_rate": 9.673527787931767e-06, + "loss": 0.5698, "step": 2004 }, { - "epoch": 0.21, - "grad_norm": 2.4846164924547747, - "learning_rate": 9.166026608241496e-06, - "loss": 0.6799, + "epoch": 0.14, + "grad_norm": 4.405637540054362, + "learning_rate": 9.673119224109872e-06, + "loss": 0.6338, "step": 2005 }, { - "epoch": 0.21, - "grad_norm": 2.2002524931768814, - "learning_rate": 9.165083984354545e-06, - "loss": 0.7163, + "epoch": 0.14, + "grad_norm": 1.0150111830508741, + "learning_rate": 9.672710413437986e-06, + "loss": 0.482, "step": 2006 }, { - "epoch": 0.21, - "grad_norm": 3.2128685263981924, - "learning_rate": 9.164140876580179e-06, - "loss": 0.6353, + "epoch": 0.14, + "grad_norm": 1.0617942680539303, + "learning_rate": 9.672301355937702e-06, + "loss": 0.4883, "step": 2007 }, { - "epoch": 0.21, - "grad_norm": 2.487800260585636, - "learning_rate": 9.16319728502796e-06, - "loss": 0.7001, + "epoch": 0.14, + "grad_norm": 1.6566950913112255, + "learning_rate": 9.671892051630627e-06, + "loss": 0.5386, "step": 2008 }, { - "epoch": 0.21, - "grad_norm": 2.3851629284461553, - "learning_rate": 9.162253209807517e-06, - "loss": 0.6288, + "epoch": 0.14, + "grad_norm": 1.8355469645237559, + "learning_rate": 9.671482500538384e-06, + "loss": 0.5196, "step": 2009 }, { - "epoch": 0.21, - "grad_norm": 2.4232158597086744, - "learning_rate": 9.161308651028527e-06, - "loss": 0.7754, + "epoch": 0.14, + "grad_norm": 1.5686807144800414, + "learning_rate": 9.671072702682607e-06, + "loss": 0.5852, "step": 2010 }, { - "epoch": 0.21, - "grad_norm": 2.154013851517835, - "learning_rate": 9.160363608800728e-06, - "loss": 0.7117, + "epoch": 0.14, + "grad_norm": 2.0859698506673254, + "learning_rate": 9.670662658084942e-06, + "loss": 0.5946, "step": 2011 }, { - "epoch": 0.21, - "grad_norm": 3.896276107997591, - "learning_rate": 9.159418083233911e-06, - "loss": 0.6859, + "epoch": 0.14, + "grad_norm": 2.072754678346567, + "learning_rate": 9.670252366767048e-06, + "loss": 0.6484, "step": 2012 }, { - "epoch": 0.21, - "grad_norm": 2.44486977130411, - "learning_rate": 9.158472074437923e-06, - "loss": 0.6467, + "epoch": 0.14, + "grad_norm": 1.8979910171079, + "learning_rate": 9.669841828750599e-06, + "loss": 0.6276, "step": 2013 }, { - "epoch": 0.21, - "grad_norm": 2.0090988720613, - "learning_rate": 9.157525582522673e-06, - "loss": 0.6484, + "epoch": 0.14, + "grad_norm": 1.655523270611167, + "learning_rate": 9.669431044057281e-06, + "loss": 0.5377, "step": 2014 }, { - "epoch": 0.21, - "grad_norm": 2.34089707383189, - "learning_rate": 9.156578607598118e-06, - "loss": 0.7049, + "epoch": 0.14, + "grad_norm": 2.2036442088972494, + "learning_rate": 9.669020012708794e-06, + "loss": 0.6371, "step": 2015 }, { - "epoch": 0.21, - "grad_norm": 1.2880641379260889, - "learning_rate": 9.155631149774276e-06, - "loss": 0.6175, + "epoch": 0.14, + "grad_norm": 2.2666401151727156, + "learning_rate": 9.668608734726849e-06, + "loss": 0.5803, "step": 2016 }, { - "epoch": 0.21, - "grad_norm": 2.229526829371313, - "learning_rate": 9.15468320916122e-06, - "loss": 0.6662, + "epoch": 0.14, + "grad_norm": 1.5142995922854994, + "learning_rate": 9.66819721013317e-06, + "loss": 0.4952, "step": 2017 }, { - "epoch": 0.21, - "grad_norm": 2.156990061837107, - "learning_rate": 9.153734785869077e-06, - "loss": 0.7708, + "epoch": 0.14, + "grad_norm": 1.7866257040591784, + "learning_rate": 9.667785438949497e-06, + "loss": 0.5539, "step": 2018 }, { - "epoch": 0.21, - "grad_norm": 1.0523799529588602, - "learning_rate": 9.152785880008035e-06, - "loss": 0.6244, + "epoch": 0.14, + "grad_norm": 1.6797978213914093, + "learning_rate": 9.667373421197581e-06, + "loss": 0.5854, "step": 2019 }, { - "epoch": 0.21, - "grad_norm": 7.993605252322678, - "learning_rate": 9.151836491688334e-06, - "loss": 0.6563, + "epoch": 0.14, + "grad_norm": 1.6707753347321554, + "learning_rate": 9.666961156899187e-06, + "loss": 0.5768, "step": 2020 }, { - "epoch": 0.21, - "grad_norm": 2.3078838508848922, - "learning_rate": 9.15088662102027e-06, - "loss": 0.6904, + "epoch": 0.14, + "grad_norm": 1.5851976861156922, + "learning_rate": 9.666548646076087e-06, + "loss": 0.5768, "step": 2021 }, { - "epoch": 0.21, - "grad_norm": 3.063801331956025, - "learning_rate": 9.149936268114199e-06, - "loss": 0.676, + "epoch": 0.14, + "grad_norm": 1.668168456898025, + "learning_rate": 9.666135888750078e-06, + "loss": 0.5441, "step": 2022 }, { - "epoch": 0.21, - "grad_norm": 2.295192468843143, - "learning_rate": 9.148985433080528e-06, - "loss": 0.6834, + "epoch": 0.14, + "grad_norm": 1.5251100633394035, + "learning_rate": 9.66572288494296e-06, + "loss": 0.628, "step": 2023 }, { - "epoch": 0.21, - "grad_norm": 2.0564302867231383, - "learning_rate": 9.148034116029723e-06, - "loss": 0.704, + "epoch": 0.14, + "grad_norm": 1.903974018376592, + "learning_rate": 9.665309634676551e-06, + "loss": 0.6056, "step": 2024 }, { - "epoch": 0.21, - "grad_norm": 2.5228740207896085, - "learning_rate": 9.147082317072305e-06, - "loss": 0.6683, + "epoch": 0.14, + "grad_norm": 2.0505935745099517, + "learning_rate": 9.664896137972677e-06, + "loss": 0.5322, "step": 2025 }, { - "epoch": 0.21, - "grad_norm": 2.7052297386153015, - "learning_rate": 9.146130036318853e-06, - "loss": 0.7911, + "epoch": 0.14, + "grad_norm": 1.9603167205898366, + "learning_rate": 9.664482394853181e-06, + "loss": 0.5453, "step": 2026 }, { - "epoch": 0.21, - "grad_norm": 2.601417655514352, - "learning_rate": 9.145177273879995e-06, - "loss": 0.6382, + "epoch": 0.14, + "grad_norm": 1.8135453717769674, + "learning_rate": 9.664068405339921e-06, + "loss": 0.6179, "step": 2027 }, { - "epoch": 0.21, - "grad_norm": 1.9751425113181098, - "learning_rate": 9.144224029866426e-06, - "loss": 0.6693, + "epoch": 0.14, + "grad_norm": 1.6197926221297367, + "learning_rate": 9.663654169454765e-06, + "loss": 0.5436, "step": 2028 }, { - "epoch": 0.21, - "grad_norm": 2.5895232014896266, - "learning_rate": 9.14327030438889e-06, - "loss": 0.6712, + "epoch": 0.14, + "grad_norm": 1.5234027741473628, + "learning_rate": 9.663239687219591e-06, + "loss": 0.5901, "step": 2029 }, { - "epoch": 0.21, - "grad_norm": 3.747457510645605, - "learning_rate": 9.142316097558185e-06, - "loss": 0.6682, + "epoch": 0.14, + "grad_norm": 1.7724826769939195, + "learning_rate": 9.662824958656297e-06, + "loss": 0.6304, "step": 2030 }, { - "epoch": 0.21, - "grad_norm": 2.220857892024144, - "learning_rate": 9.14136140948517e-06, - "loss": 0.7372, + "epoch": 0.14, + "grad_norm": 1.5036064684291701, + "learning_rate": 9.662409983786788e-06, + "loss": 0.5659, "step": 2031 }, { - "epoch": 0.21, - "grad_norm": 2.3266528015138386, - "learning_rate": 9.14040624028076e-06, - "loss": 0.7786, + "epoch": 0.14, + "grad_norm": 1.9924638243978485, + "learning_rate": 9.661994762632985e-06, + "loss": 0.5839, "step": 2032 }, { - "epoch": 0.21, - "grad_norm": 2.348078882997178, - "learning_rate": 9.13945059005592e-06, - "loss": 0.7961, + "epoch": 0.14, + "grad_norm": 1.7522431124786204, + "learning_rate": 9.661579295216823e-06, + "loss": 0.4999, "step": 2033 }, { - "epoch": 0.21, - "grad_norm": 2.81669382916974, - "learning_rate": 9.138494458921676e-06, - "loss": 0.6599, + "epoch": 0.14, + "grad_norm": 1.436011301037939, + "learning_rate": 9.661163581560247e-06, + "loss": 0.5881, "step": 2034 }, { - "epoch": 0.21, - "grad_norm": 2.170306767141564, - "learning_rate": 9.137537846989111e-06, - "loss": 0.6299, + "epoch": 0.14, + "grad_norm": 1.6021055598775995, + "learning_rate": 9.660747621685214e-06, + "loss": 0.5803, "step": 2035 }, { - "epoch": 0.21, - "grad_norm": 2.5696842899169385, - "learning_rate": 9.136580754369357e-06, - "loss": 0.6784, + "epoch": 0.14, + "grad_norm": 2.1006013924281155, + "learning_rate": 9.660331415613701e-06, + "loss": 0.6104, "step": 2036 }, { - "epoch": 0.21, - "grad_norm": 2.3739145799623045, - "learning_rate": 9.135623181173609e-06, - "loss": 0.7959, + "epoch": 0.14, + "grad_norm": 1.6350316264620368, + "learning_rate": 9.659914963367692e-06, + "loss": 0.6015, "step": 2037 }, { - "epoch": 0.21, - "grad_norm": 2.3501976834313174, - "learning_rate": 9.134665127513116e-06, - "loss": 0.6405, + "epoch": 0.14, + "grad_norm": 1.5371917609626466, + "learning_rate": 9.659498264969183e-06, + "loss": 0.5337, "step": 2038 }, { - "epoch": 0.21, - "grad_norm": 2.7821580766766676, - "learning_rate": 9.133706593499181e-06, - "loss": 0.7582, + "epoch": 0.14, + "grad_norm": 2.6581339675955853, + "learning_rate": 9.659081320440187e-06, + "loss": 0.5661, "step": 2039 }, { - "epoch": 0.21, - "grad_norm": 2.0217636722769323, - "learning_rate": 9.132747579243163e-06, - "loss": 0.7176, + "epoch": 0.14, + "grad_norm": 1.5231811415838228, + "learning_rate": 9.658664129802728e-06, + "loss": 0.5841, "step": 2040 }, { - "epoch": 0.21, - "grad_norm": 2.149948219126222, - "learning_rate": 9.131788084856477e-06, - "loss": 0.7303, + "epoch": 0.14, + "grad_norm": 1.6672454895742175, + "learning_rate": 9.658246693078845e-06, + "loss": 0.5412, "step": 2041 }, { - "epoch": 0.21, - "grad_norm": 2.279544855221473, - "learning_rate": 9.130828110450593e-06, - "loss": 0.6588, + "epoch": 0.14, + "grad_norm": 1.6334012290134465, + "learning_rate": 9.657829010290588e-06, + "loss": 0.569, "step": 2042 }, { - "epoch": 0.21, - "grad_norm": 2.49303969504543, - "learning_rate": 9.129867656137044e-06, - "loss": 0.7645, + "epoch": 0.14, + "grad_norm": 1.6602505191534778, + "learning_rate": 9.65741108146002e-06, + "loss": 0.5885, "step": 2043 }, { - "epoch": 0.22, - "grad_norm": 2.143115712697355, - "learning_rate": 9.128906722027406e-06, - "loss": 0.6674, + "epoch": 0.15, + "grad_norm": 1.6446863770357623, + "learning_rate": 9.656992906609215e-06, + "loss": 0.5533, "step": 2044 }, { - "epoch": 0.22, - "grad_norm": 2.0564042062634225, - "learning_rate": 9.127945308233322e-06, - "loss": 0.7074, + "epoch": 0.15, + "grad_norm": 1.6364036088818459, + "learning_rate": 9.656574485760267e-06, + "loss": 0.581, "step": 2045 }, { - "epoch": 0.22, - "grad_norm": 2.147223116012419, - "learning_rate": 9.126983414866486e-06, - "loss": 0.7134, + "epoch": 0.15, + "grad_norm": 1.5983674236471037, + "learning_rate": 9.656155818935275e-06, + "loss": 0.571, "step": 2046 }, { - "epoch": 0.22, - "grad_norm": 2.272635221500622, - "learning_rate": 9.126021042038644e-06, - "loss": 0.7515, + "epoch": 0.15, + "grad_norm": 1.7967262931593406, + "learning_rate": 9.655736906156355e-06, + "loss": 0.5308, "step": 2047 }, { - "epoch": 0.22, - "grad_norm": 2.7298434180359954, - "learning_rate": 9.125058189861607e-06, - "loss": 0.7381, + "epoch": 0.15, + "grad_norm": 1.821770562664974, + "learning_rate": 9.655317747445636e-06, + "loss": 0.5404, "step": 2048 }, { - "epoch": 0.22, - "grad_norm": 3.0486731591766913, - "learning_rate": 9.124094858447233e-06, - "loss": 0.6821, + "epoch": 0.15, + "grad_norm": 1.644145447688416, + "learning_rate": 9.654898342825261e-06, + "loss": 0.5376, "step": 2049 }, { - "epoch": 0.22, - "grad_norm": 4.6588169832809205, - "learning_rate": 9.123131047907439e-06, - "loss": 0.8302, + "epoch": 0.15, + "grad_norm": 1.9126759109702718, + "learning_rate": 9.65447869231738e-06, + "loss": 0.5682, "step": 2050 }, { - "epoch": 0.22, - "grad_norm": 2.07023150767486, - "learning_rate": 9.122166758354199e-06, - "loss": 0.6876, + "epoch": 0.15, + "grad_norm": 1.660477788633002, + "learning_rate": 9.654058795944165e-06, + "loss": 0.503, "step": 2051 }, { - "epoch": 0.22, - "grad_norm": 2.7807238495464133, - "learning_rate": 9.12120198989954e-06, - "loss": 0.5919, + "epoch": 0.15, + "grad_norm": 2.6663209914515105, + "learning_rate": 9.653638653727793e-06, + "loss": 0.5484, "step": 2052 }, { - "epoch": 0.22, - "grad_norm": 2.1579164615666837, - "learning_rate": 9.120236742655548e-06, - "loss": 0.7351, + "epoch": 0.15, + "grad_norm": 1.748013609153089, + "learning_rate": 9.653218265690458e-06, + "loss": 0.5833, "step": 2053 }, { - "epoch": 0.22, - "grad_norm": 2.5527053709438214, - "learning_rate": 9.11927101673436e-06, - "loss": 0.6993, + "epoch": 0.15, + "grad_norm": 1.7319528083631415, + "learning_rate": 9.65279763185437e-06, + "loss": 0.5658, "step": 2054 }, { - "epoch": 0.22, - "grad_norm": 2.261502991172867, - "learning_rate": 9.118304812248177e-06, - "loss": 0.6502, + "epoch": 0.15, + "grad_norm": 1.6805316558904804, + "learning_rate": 9.652376752241743e-06, + "loss": 0.5812, "step": 2055 }, { - "epoch": 0.22, - "grad_norm": 3.242461395330852, - "learning_rate": 9.117338129309243e-06, - "loss": 0.6605, + "epoch": 0.15, + "grad_norm": 1.66376613805248, + "learning_rate": 9.651955626874812e-06, + "loss": 0.6364, "step": 2056 }, { - "epoch": 0.22, - "grad_norm": 1.914414634765642, - "learning_rate": 9.116370968029867e-06, - "loss": 0.6981, + "epoch": 0.15, + "grad_norm": 2.0905530261252876, + "learning_rate": 9.65153425577582e-06, + "loss": 0.596, "step": 2057 }, { - "epoch": 0.22, - "grad_norm": 2.344979314174602, - "learning_rate": 9.115403328522412e-06, - "loss": 0.7227, + "epoch": 0.15, + "grad_norm": 1.6155690893284218, + "learning_rate": 9.651112638967028e-06, + "loss": 0.6167, "step": 2058 }, { - "epoch": 0.22, - "grad_norm": 2.191238107674176, - "learning_rate": 9.114435210899296e-06, - "loss": 0.6631, + "epoch": 0.15, + "grad_norm": 1.5356914287380137, + "learning_rate": 9.650690776470708e-06, + "loss": 0.5808, "step": 2059 }, { - "epoch": 0.22, - "grad_norm": 2.4602672291177528, - "learning_rate": 9.113466615272988e-06, - "loss": 0.7243, + "epoch": 0.15, + "grad_norm": 1.9078423973083523, + "learning_rate": 9.650268668309142e-06, + "loss": 0.5374, "step": 2060 }, { - "epoch": 0.22, - "grad_norm": 1.44867772558428, - "learning_rate": 9.11249754175602e-06, - "loss": 0.6982, + "epoch": 0.15, + "grad_norm": 1.7124192012946258, + "learning_rate": 9.649846314504627e-06, + "loss": 0.5486, "step": 2061 }, { - "epoch": 0.22, - "grad_norm": 2.651963613477152, - "learning_rate": 9.111527990460977e-06, - "loss": 0.7013, + "epoch": 0.15, + "grad_norm": 1.82062343192843, + "learning_rate": 9.649423715079474e-06, + "loss": 0.6086, "step": 2062 }, { - "epoch": 0.22, - "grad_norm": 2.5214633687617325, - "learning_rate": 9.110557961500496e-06, - "loss": 0.6273, + "epoch": 0.15, + "grad_norm": 1.498429846532035, + "learning_rate": 9.649000870056004e-06, + "loss": 0.5706, "step": 2063 }, { - "epoch": 0.22, - "grad_norm": 2.1745678771179233, - "learning_rate": 9.109587454987274e-06, - "loss": 0.6482, + "epoch": 0.15, + "grad_norm": 2.1568399675893093, + "learning_rate": 9.648577779456559e-06, + "loss": 0.5738, "step": 2064 }, { - "epoch": 0.22, - "grad_norm": 2.489922591200943, - "learning_rate": 9.108616471034061e-06, - "loss": 0.6609, + "epoch": 0.15, + "grad_norm": 1.0699549739599807, + "learning_rate": 9.648154443303481e-06, + "loss": 0.4914, "step": 2065 }, { - "epoch": 0.22, - "grad_norm": 2.0973955866357814, - "learning_rate": 9.107645009753663e-06, - "loss": 0.6722, + "epoch": 0.15, + "grad_norm": 1.8142495445685458, + "learning_rate": 9.647730861619137e-06, + "loss": 0.6066, "step": 2066 }, { - "epoch": 0.22, - "grad_norm": 1.9996934192215, - "learning_rate": 9.106673071258942e-06, - "loss": 0.7301, + "epoch": 0.15, + "grad_norm": 1.85639134074057, + "learning_rate": 9.647307034425898e-06, + "loss": 0.6192, "step": 2067 }, { - "epoch": 0.22, - "grad_norm": 2.494129615110545, - "learning_rate": 9.105700655662815e-06, - "loss": 0.6926, + "epoch": 0.15, + "grad_norm": 0.8748757760257014, + "learning_rate": 9.646882961746157e-06, + "loss": 0.4823, "step": 2068 }, { - "epoch": 0.22, - "grad_norm": 11.264357578268509, - "learning_rate": 9.104727763078253e-06, - "loss": 0.7061, + "epoch": 0.15, + "grad_norm": 1.5680903256415388, + "learning_rate": 9.646458643602311e-06, + "loss": 0.5745, "step": 2069 }, { - "epoch": 0.22, - "grad_norm": 2.8039561431694544, - "learning_rate": 9.103754393618287e-06, - "loss": 0.6591, + "epoch": 0.15, + "grad_norm": 1.4829305536462372, + "learning_rate": 9.646034080016775e-06, + "loss": 0.5856, "step": 2070 }, { - "epoch": 0.22, - "grad_norm": 2.2260708554024737, - "learning_rate": 9.102780547395997e-06, - "loss": 0.7733, + "epoch": 0.15, + "grad_norm": 1.8330741506963368, + "learning_rate": 9.645609271011975e-06, + "loss": 0.6381, "step": 2071 }, { - "epoch": 0.22, - "grad_norm": 2.6838150314170997, - "learning_rate": 9.101806224524524e-06, - "loss": 0.7586, + "epoch": 0.15, + "grad_norm": 2.5936991764920876, + "learning_rate": 9.645184216610353e-06, + "loss": 0.5583, "step": 2072 }, { - "epoch": 0.22, - "grad_norm": 3.5259424740449896, - "learning_rate": 9.10083142511706e-06, - "loss": 0.6863, + "epoch": 0.15, + "grad_norm": 1.7679033702681357, + "learning_rate": 9.644758916834362e-06, + "loss": 0.5508, "step": 2073 }, { - "epoch": 0.22, - "grad_norm": 1.894372573153219, - "learning_rate": 9.099856149286857e-06, - "loss": 0.6572, + "epoch": 0.15, + "grad_norm": 1.636242475268784, + "learning_rate": 9.644333371706465e-06, + "loss": 0.588, "step": 2074 }, { - "epoch": 0.22, - "grad_norm": 3.246841628686618, - "learning_rate": 9.098880397147215e-06, - "loss": 0.6908, + "epoch": 0.15, + "grad_norm": 1.58409404352087, + "learning_rate": 9.643907581249142e-06, + "loss": 0.6201, "step": 2075 }, { - "epoch": 0.22, - "grad_norm": 3.039572983541654, - "learning_rate": 9.0979041688115e-06, - "loss": 0.6255, + "epoch": 0.15, + "grad_norm": 1.0402329938802013, + "learning_rate": 9.643481545484884e-06, + "loss": 0.4556, "step": 2076 }, { - "epoch": 0.22, - "grad_norm": 1.5644692341003028, - "learning_rate": 9.096927464393123e-06, - "loss": 0.6359, + "epoch": 0.15, + "grad_norm": 1.692053828629462, + "learning_rate": 9.643055264436198e-06, + "loss": 0.6071, "step": 2077 }, { - "epoch": 0.22, - "grad_norm": 2.305502204546247, - "learning_rate": 9.095950284005557e-06, - "loss": 0.7748, + "epoch": 0.15, + "grad_norm": 2.2754445985078755, + "learning_rate": 9.6426287381256e-06, + "loss": 0.6024, "step": 2078 }, { - "epoch": 0.22, - "grad_norm": 2.2388140399919334, - "learning_rate": 9.094972627762326e-06, - "loss": 0.7147, + "epoch": 0.15, + "grad_norm": 1.690469348086785, + "learning_rate": 9.64220196657562e-06, + "loss": 0.5908, "step": 2079 }, { - "epoch": 0.22, - "grad_norm": 2.137643134632365, - "learning_rate": 9.093994495777014e-06, - "loss": 0.7239, + "epoch": 0.15, + "grad_norm": 1.5783069100901368, + "learning_rate": 9.641774949808802e-06, + "loss": 0.5091, "step": 2080 }, { - "epoch": 0.22, - "grad_norm": 2.166788507712685, - "learning_rate": 9.093015888163255e-06, - "loss": 0.7105, + "epoch": 0.15, + "grad_norm": 1.8049987864415111, + "learning_rate": 9.641347687847703e-06, + "loss": 0.5436, "step": 2081 }, { - "epoch": 0.22, - "grad_norm": 2.811849358016705, - "learning_rate": 9.09203680503474e-06, - "loss": 0.6821, + "epoch": 0.15, + "grad_norm": 0.8545483194862561, + "learning_rate": 9.640920180714892e-06, + "loss": 0.4694, "step": 2082 }, { - "epoch": 0.22, - "grad_norm": 2.196127603607177, - "learning_rate": 9.091057246505221e-06, - "loss": 0.7968, + "epoch": 0.15, + "grad_norm": 0.8260283885519006, + "learning_rate": 9.640492428432953e-06, + "loss": 0.4745, "step": 2083 }, { - "epoch": 0.22, - "grad_norm": 9.922784029473444, - "learning_rate": 9.090077212688496e-06, - "loss": 0.7587, + "epoch": 0.15, + "grad_norm": 1.6222758755214932, + "learning_rate": 9.640064431024478e-06, + "loss": 0.606, "step": 2084 }, { - "epoch": 0.22, - "grad_norm": 2.039930805628341, - "learning_rate": 9.089096703698423e-06, - "loss": 0.6852, + "epoch": 0.15, + "grad_norm": 1.7979913433033554, + "learning_rate": 9.639636188512077e-06, + "loss": 0.5864, "step": 2085 }, { - "epoch": 0.22, - "grad_norm": 30.95824912540301, - "learning_rate": 9.088115719648917e-06, - "loss": 0.7022, + "epoch": 0.15, + "grad_norm": 1.9189532786546784, + "learning_rate": 9.639207700918371e-06, + "loss": 0.5791, "step": 2086 }, { - "epoch": 0.22, - "grad_norm": 2.181623054651788, - "learning_rate": 9.087134260653943e-06, - "loss": 0.6246, + "epoch": 0.15, + "grad_norm": 1.5630318936966947, + "learning_rate": 9.638778968265996e-06, + "loss": 0.5754, "step": 2087 }, { - "epoch": 0.22, - "grad_norm": 1.9997909852846925, - "learning_rate": 9.086152326827527e-06, - "loss": 0.6697, + "epoch": 0.15, + "grad_norm": 2.3730909439619365, + "learning_rate": 9.638349990577596e-06, + "loss": 0.6084, "step": 2088 }, { - "epoch": 0.22, - "grad_norm": 2.395290205085553, - "learning_rate": 9.085169918283744e-06, - "loss": 0.6567, + "epoch": 0.15, + "grad_norm": 0.8536363889330592, + "learning_rate": 9.637920767875834e-06, + "loss": 0.4393, "step": 2089 }, { - "epoch": 0.22, - "grad_norm": 2.836594783866522, - "learning_rate": 9.084187035136727e-06, - "loss": 0.7324, + "epoch": 0.15, + "grad_norm": 1.4656946589000912, + "learning_rate": 9.637491300183382e-06, + "loss": 0.5616, "step": 2090 }, { - "epoch": 0.22, - "grad_norm": 2.3072720294313496, - "learning_rate": 9.08320367750067e-06, - "loss": 0.6529, + "epoch": 0.15, + "grad_norm": 1.8727987670906867, + "learning_rate": 9.637061587522924e-06, + "loss": 0.57, "step": 2091 }, { - "epoch": 0.22, - "grad_norm": 1.8931936091262302, - "learning_rate": 9.08221984548981e-06, - "loss": 0.7267, + "epoch": 0.15, + "grad_norm": 2.2372234488022182, + "learning_rate": 9.636631629917161e-06, + "loss": 0.5531, "step": 2092 }, { - "epoch": 0.22, - "grad_norm": 2.4682881700848234, - "learning_rate": 9.081235539218451e-06, - "loss": 0.6981, + "epoch": 0.15, + "grad_norm": 1.8286970423887974, + "learning_rate": 9.636201427388805e-06, + "loss": 0.602, "step": 2093 }, { - "epoch": 0.22, - "grad_norm": 2.1040997554296026, - "learning_rate": 9.080250758800944e-06, - "loss": 0.7395, + "epoch": 0.15, + "grad_norm": 1.6313497419588237, + "learning_rate": 9.635770979960579e-06, + "loss": 0.6238, "step": 2094 }, { - "epoch": 0.22, - "grad_norm": 2.148229839051349, - "learning_rate": 9.0792655043517e-06, - "loss": 0.6395, + "epoch": 0.15, + "grad_norm": 1.7222762043488424, + "learning_rate": 9.635340287655222e-06, + "loss": 0.5491, "step": 2095 }, { - "epoch": 0.22, - "grad_norm": 2.008482369488449, - "learning_rate": 9.078279775985179e-06, - "loss": 0.665, + "epoch": 0.15, + "grad_norm": 1.7784170256613303, + "learning_rate": 9.634909350495486e-06, + "loss": 0.5248, "step": 2096 }, { - "epoch": 0.22, - "grad_norm": 2.414670215539318, - "learning_rate": 9.077293573815905e-06, - "loss": 0.7037, + "epoch": 0.15, + "grad_norm": 1.8970225157715686, + "learning_rate": 9.634478168504129e-06, + "loss": 0.5685, "step": 2097 }, { - "epoch": 0.22, - "grad_norm": 2.6045420628577474, - "learning_rate": 9.07630689795845e-06, - "loss": 0.6876, + "epoch": 0.15, + "grad_norm": 1.4375833981504196, + "learning_rate": 9.634046741703935e-06, + "loss": 0.5383, "step": 2098 }, { - "epoch": 0.22, - "grad_norm": 3.089544444738824, - "learning_rate": 9.075319748527442e-06, - "loss": 0.6634, + "epoch": 0.15, + "grad_norm": 2.482557236287892, + "learning_rate": 9.633615070117687e-06, + "loss": 0.6019, "step": 2099 }, { - "epoch": 0.22, - "grad_norm": 7.068471813524876, - "learning_rate": 9.074332125637564e-06, - "loss": 0.7743, + "epoch": 0.15, + "grad_norm": 1.5366986101767295, + "learning_rate": 9.633183153768193e-06, + "loss": 0.5695, "step": 2100 }, { - "epoch": 0.22, - "grad_norm": 10.390556215843803, - "learning_rate": 9.073344029403562e-06, - "loss": 0.6952, + "epoch": 0.15, + "grad_norm": 1.6071249771554728, + "learning_rate": 9.632750992678264e-06, + "loss": 0.5275, "step": 2101 }, { - "epoch": 0.22, - "grad_norm": 2.5241030806585005, - "learning_rate": 9.072355459940222e-06, - "loss": 0.7437, + "epoch": 0.15, + "grad_norm": 1.6934258327488794, + "learning_rate": 9.63231858687073e-06, + "loss": 0.608, "step": 2102 }, { - "epoch": 0.22, - "grad_norm": 3.752003680927352, - "learning_rate": 9.071366417362398e-06, - "loss": 0.7585, + "epoch": 0.15, + "grad_norm": 2.1247468322140817, + "learning_rate": 9.63188593636843e-06, + "loss": 0.595, "step": 2103 }, { - "epoch": 0.22, - "grad_norm": 2.5306889032364492, - "learning_rate": 9.070376901784992e-06, - "loss": 0.7722, + "epoch": 0.15, + "grad_norm": 6.776780103002606, + "learning_rate": 9.631453041194222e-06, + "loss": 0.4867, "step": 2104 }, { - "epoch": 0.22, - "grad_norm": 2.5152078389519015, - "learning_rate": 9.069386913322964e-06, - "loss": 0.7526, + "epoch": 0.15, + "grad_norm": 1.7197899600025235, + "learning_rate": 9.63101990137097e-06, + "loss": 0.5148, "step": 2105 }, { - "epoch": 0.22, - "grad_norm": 2.300437974231803, - "learning_rate": 9.068396452091328e-06, - "loss": 0.7412, + "epoch": 0.15, + "grad_norm": 1.6054649878644405, + "learning_rate": 9.630586516921557e-06, + "loss": 0.5827, "step": 2106 }, { - "epoch": 0.22, - "grad_norm": 2.966239911106796, - "learning_rate": 9.067405518205153e-06, - "loss": 0.6955, + "epoch": 0.15, + "grad_norm": 2.2956106326330814, + "learning_rate": 9.63015288786887e-06, + "loss": 0.5844, "step": 2107 }, { - "epoch": 0.22, - "grad_norm": 2.374383053660284, - "learning_rate": 9.066414111779562e-06, - "loss": 0.7065, + "epoch": 0.15, + "grad_norm": 2.2314178699409575, + "learning_rate": 9.629719014235819e-06, + "loss": 0.6091, "step": 2108 }, { - "epoch": 0.22, - "grad_norm": 1.95341575255716, - "learning_rate": 9.065422232929735e-06, - "loss": 0.6911, + "epoch": 0.15, + "grad_norm": 2.0993559205520014, + "learning_rate": 9.629284896045325e-06, + "loss": 0.5662, "step": 2109 }, { - "epoch": 0.22, - "grad_norm": 1.38959249885153, - "learning_rate": 9.064429881770905e-06, - "loss": 0.5927, + "epoch": 0.15, + "grad_norm": 1.5612362591715663, + "learning_rate": 9.628850533320314e-06, + "loss": 0.6219, "step": 2110 }, { - "epoch": 0.22, - "grad_norm": 2.354874954642544, - "learning_rate": 9.063437058418361e-06, - "loss": 0.6709, + "epoch": 0.15, + "grad_norm": 1.580686264506666, + "learning_rate": 9.628415926083734e-06, + "loss": 0.5884, "step": 2111 }, { - "epoch": 0.22, - "grad_norm": 3.1182596136285308, - "learning_rate": 9.062443762987442e-06, - "loss": 0.7256, + "epoch": 0.15, + "grad_norm": 1.7302996820413585, + "learning_rate": 9.62798107435854e-06, + "loss": 0.609, "step": 2112 }, { - "epoch": 0.22, - "grad_norm": 2.3451511992294085, - "learning_rate": 9.061449995593554e-06, - "loss": 0.6163, + "epoch": 0.15, + "grad_norm": 1.5580120318008182, + "learning_rate": 9.627545978167707e-06, + "loss": 0.6051, "step": 2113 }, { - "epoch": 0.22, - "grad_norm": 2.6148189214617816, - "learning_rate": 9.060455756352144e-06, - "loss": 0.7164, + "epoch": 0.15, + "grad_norm": 1.6924941956661084, + "learning_rate": 9.627110637534212e-06, + "loss": 0.5084, "step": 2114 }, { - "epoch": 0.22, - "grad_norm": 2.984293021281382, - "learning_rate": 9.059461045378723e-06, - "loss": 0.5894, + "epoch": 0.15, + "grad_norm": 2.3619016009512355, + "learning_rate": 9.626675052481057e-06, + "loss": 0.531, "step": 2115 }, { - "epoch": 0.22, - "grad_norm": 2.254747474890875, - "learning_rate": 9.058465862788852e-06, - "loss": 0.6708, + "epoch": 0.15, + "grad_norm": 1.6599927704723276, + "learning_rate": 9.626239223031247e-06, + "loss": 0.5729, "step": 2116 }, { - "epoch": 0.22, - "grad_norm": 2.363609404179215, - "learning_rate": 9.05747020869815e-06, - "loss": 0.6888, + "epoch": 0.15, + "grad_norm": 1.6263928065969147, + "learning_rate": 9.625803149207807e-06, + "loss": 0.6004, "step": 2117 }, { - "epoch": 0.22, - "grad_norm": 2.37996717795513, - "learning_rate": 9.056474083222286e-06, - "loss": 0.5809, + "epoch": 0.15, + "grad_norm": 0.8881195059122535, + "learning_rate": 9.625366831033769e-06, + "loss": 0.4587, "step": 2118 }, { - "epoch": 0.22, - "grad_norm": 2.7306453255932848, - "learning_rate": 9.055477486476992e-06, - "loss": 0.7184, + "epoch": 0.15, + "grad_norm": 1.7408135069537287, + "learning_rate": 9.62493026853218e-06, + "loss": 0.5917, "step": 2119 }, { - "epoch": 0.22, - "grad_norm": 2.3108089867660713, - "learning_rate": 9.054480418578044e-06, - "loss": 0.6179, + "epoch": 0.15, + "grad_norm": 1.8939483269755766, + "learning_rate": 9.624493461726106e-06, + "loss": 0.6377, "step": 2120 }, { - "epoch": 0.22, - "grad_norm": 2.5666873554391665, - "learning_rate": 9.053482879641283e-06, - "loss": 0.701, + "epoch": 0.15, + "grad_norm": 1.581128405060929, + "learning_rate": 9.624056410638616e-06, + "loss": 0.5762, "step": 2121 }, { - "epoch": 0.22, - "grad_norm": 2.492602226878482, - "learning_rate": 9.052484869782597e-06, - "loss": 0.6191, + "epoch": 0.15, + "grad_norm": 1.6507236737470345, + "learning_rate": 9.623619115292798e-06, + "loss": 0.5547, "step": 2122 }, { - "epoch": 0.22, - "grad_norm": 9.660745248304112, - "learning_rate": 9.051486389117933e-06, - "loss": 0.6498, + "epoch": 0.15, + "grad_norm": 1.8455942656741242, + "learning_rate": 9.623181575711751e-06, + "loss": 0.5457, "step": 2123 }, { - "epoch": 0.22, - "grad_norm": 2.593579048719051, - "learning_rate": 9.050487437763294e-06, - "loss": 0.7411, + "epoch": 0.15, + "grad_norm": 1.5311010973267534, + "learning_rate": 9.62274379191859e-06, + "loss": 0.6303, "step": 2124 }, { - "epoch": 0.22, - "grad_norm": 2.193130270383422, - "learning_rate": 9.049488015834731e-06, - "loss": 0.6509, + "epoch": 0.15, + "grad_norm": 1.8221024399297454, + "learning_rate": 9.622305763936435e-06, + "loss": 0.6004, "step": 2125 }, { - "epoch": 0.22, - "grad_norm": 2.286545999733999, - "learning_rate": 9.048488123448357e-06, - "loss": 0.7333, + "epoch": 0.15, + "grad_norm": 1.8926679994701423, + "learning_rate": 9.621867491788429e-06, + "loss": 0.6378, "step": 2126 }, { - "epoch": 0.22, - "grad_norm": 2.7285901550002145, - "learning_rate": 9.047487760720338e-06, - "loss": 0.755, + "epoch": 0.15, + "grad_norm": 1.4911481752248694, + "learning_rate": 9.62142897549772e-06, + "loss": 0.5368, "step": 2127 }, { - "epoch": 0.22, - "grad_norm": 2.5323414148927963, - "learning_rate": 9.046486927766889e-06, - "loss": 0.6686, + "epoch": 0.15, + "grad_norm": 1.6313570042182557, + "learning_rate": 9.620990215087474e-06, + "loss": 0.5525, "step": 2128 }, { - "epoch": 0.22, - "grad_norm": 1.2528013077555502, - "learning_rate": 9.045485624704287e-06, - "loss": 0.6362, + "epoch": 0.15, + "grad_norm": 1.6236220596588629, + "learning_rate": 9.620551210580864e-06, + "loss": 0.6302, "step": 2129 }, { - "epoch": 0.22, - "grad_norm": 2.170043281237849, - "learning_rate": 9.044483851648858e-06, - "loss": 0.6531, + "epoch": 0.15, + "grad_norm": 1.5272536152165817, + "learning_rate": 9.620111962001085e-06, + "loss": 0.5727, "step": 2130 }, { - "epoch": 0.22, - "grad_norm": 2.537814352100077, - "learning_rate": 9.043481608716987e-06, - "loss": 0.7327, + "epoch": 0.15, + "grad_norm": 1.5305824802934582, + "learning_rate": 9.619672469371338e-06, + "loss": 0.6042, "step": 2131 }, { - "epoch": 0.22, - "grad_norm": 2.1536774455658683, - "learning_rate": 9.042478896025113e-06, - "loss": 0.737, + "epoch": 0.15, + "grad_norm": 1.6801400949636047, + "learning_rate": 9.619232732714836e-06, + "loss": 0.5557, "step": 2132 }, { - "epoch": 0.22, - "grad_norm": 2.4395488467277504, - "learning_rate": 9.041475713689725e-06, - "loss": 0.7198, + "epoch": 0.15, + "grad_norm": 9.648696701443642, + "learning_rate": 9.618792752054809e-06, + "loss": 0.5719, "step": 2133 }, { - "epoch": 0.22, - "grad_norm": 3.612160402864214, - "learning_rate": 9.04047206182737e-06, - "loss": 0.7047, + "epoch": 0.15, + "grad_norm": 1.9730159486097594, + "learning_rate": 9.618352527414498e-06, + "loss": 0.6492, "step": 2134 }, { - "epoch": 0.22, - "grad_norm": 2.361860087108175, - "learning_rate": 9.039467940554651e-06, - "loss": 0.8051, + "epoch": 0.15, + "grad_norm": 1.0281732099371197, + "learning_rate": 9.617912058817157e-06, + "loss": 0.481, "step": 2135 }, { - "epoch": 0.22, - "grad_norm": 1.1118384611691083, - "learning_rate": 9.038463349988226e-06, - "loss": 0.6651, + "epoch": 0.15, + "grad_norm": 2.009736177093827, + "learning_rate": 9.617471346286056e-06, + "loss": 0.5864, "step": 2136 }, { - "epoch": 0.22, - "grad_norm": 2.1279878773488656, - "learning_rate": 9.0374582902448e-06, - "loss": 0.7212, + "epoch": 0.15, + "grad_norm": 2.1903720771094797, + "learning_rate": 9.61703038984447e-06, + "loss": 0.5581, "step": 2137 }, { - "epoch": 0.22, - "grad_norm": 1.984425840534222, - "learning_rate": 9.036452761441143e-06, - "loss": 0.6806, + "epoch": 0.15, + "grad_norm": 1.9607948746207904, + "learning_rate": 9.616589189515696e-06, + "loss": 0.6081, "step": 2138 }, { - "epoch": 0.23, - "grad_norm": 2.2564393133187943, - "learning_rate": 9.035446763694073e-06, - "loss": 0.6898, + "epoch": 0.15, + "grad_norm": 0.896750028865289, + "learning_rate": 9.616147745323035e-06, + "loss": 0.4836, "step": 2139 }, { - "epoch": 0.23, - "grad_norm": 3.2675730698127854, - "learning_rate": 9.034440297120461e-06, - "loss": 0.6907, + "epoch": 0.15, + "grad_norm": 1.606011389402128, + "learning_rate": 9.61570605728981e-06, + "loss": 0.6121, "step": 2140 }, { - "epoch": 0.23, - "grad_norm": 3.1457055260967333, - "learning_rate": 9.03343336183724e-06, - "loss": 0.6275, + "epoch": 0.15, + "grad_norm": 1.5117191558580692, + "learning_rate": 9.615264125439351e-06, + "loss": 0.5957, "step": 2141 }, { - "epoch": 0.23, - "grad_norm": 2.699650466296311, - "learning_rate": 9.032425957961388e-06, - "loss": 0.7142, + "epoch": 0.15, + "grad_norm": 1.5649312330426122, + "learning_rate": 9.614821949795002e-06, + "loss": 0.5736, "step": 2142 }, { - "epoch": 0.23, - "grad_norm": 2.361799060850231, - "learning_rate": 9.031418085609946e-06, - "loss": 0.7878, + "epoch": 0.15, + "grad_norm": 1.8291966435336662, + "learning_rate": 9.614379530380122e-06, + "loss": 0.563, "step": 2143 }, { - "epoch": 0.23, - "grad_norm": 3.0889387826729044, - "learning_rate": 9.030409744900005e-06, - "loss": 0.7566, + "epoch": 0.15, + "grad_norm": 1.6712860530923126, + "learning_rate": 9.613936867218078e-06, + "loss": 0.5737, "step": 2144 }, { - "epoch": 0.23, - "grad_norm": 2.6149850709834492, - "learning_rate": 9.029400935948712e-06, - "loss": 0.6909, + "epoch": 0.15, + "grad_norm": 1.7177016092015445, + "learning_rate": 9.613493960332253e-06, + "loss": 0.5788, "step": 2145 }, { - "epoch": 0.23, - "grad_norm": 2.102880043260286, - "learning_rate": 9.028391658873264e-06, - "loss": 0.6655, + "epoch": 0.15, + "grad_norm": 2.0869939856829345, + "learning_rate": 9.613050809746048e-06, + "loss": 0.6407, "step": 2146 }, { - "epoch": 0.23, - "grad_norm": 2.6572624912551666, - "learning_rate": 9.027381913790916e-06, - "loss": 0.627, + "epoch": 0.15, + "grad_norm": 1.6920754054443645, + "learning_rate": 9.612607415482867e-06, + "loss": 0.5652, "step": 2147 }, { - "epoch": 0.23, - "grad_norm": 2.6045372231550585, - "learning_rate": 9.026371700818982e-06, - "loss": 0.779, + "epoch": 0.15, + "grad_norm": 1.9108186505814162, + "learning_rate": 9.61216377756613e-06, + "loss": 0.6298, "step": 2148 }, { - "epoch": 0.23, - "grad_norm": 2.662458429721062, - "learning_rate": 9.025361020074823e-06, - "loss": 0.7057, + "epoch": 0.15, + "grad_norm": 1.7331045141645243, + "learning_rate": 9.611719896019275e-06, + "loss": 0.5588, "step": 2149 }, { - "epoch": 0.23, - "grad_norm": 1.8868287840968536, - "learning_rate": 9.024349871675855e-06, - "loss": 0.7235, + "epoch": 0.15, + "grad_norm": 1.446690857447552, + "learning_rate": 9.611275770865751e-06, + "loss": 0.5253, "step": 2150 }, { - "epoch": 0.23, - "grad_norm": 2.2117676794317105, - "learning_rate": 9.023338255739553e-06, - "loss": 0.7088, + "epoch": 0.15, + "grad_norm": 1.653167462259553, + "learning_rate": 9.610831402129015e-06, + "loss": 0.5901, "step": 2151 }, { - "epoch": 0.23, - "grad_norm": 2.2014960756353057, - "learning_rate": 9.022326172383444e-06, - "loss": 0.7346, + "epoch": 0.15, + "grad_norm": 1.6084795754159313, + "learning_rate": 9.610386789832538e-06, + "loss": 0.6437, "step": 2152 }, { - "epoch": 0.23, - "grad_norm": 2.14684126625403, - "learning_rate": 9.021313621725106e-06, - "loss": 0.734, + "epoch": 0.15, + "grad_norm": 1.6096116843716342, + "learning_rate": 9.609941933999812e-06, + "loss": 0.5988, "step": 2153 }, { - "epoch": 0.23, - "grad_norm": 2.558372722059645, - "learning_rate": 9.020300603882178e-06, - "loss": 0.6929, + "epoch": 0.15, + "grad_norm": 1.6785044986328697, + "learning_rate": 9.60949683465433e-06, + "loss": 0.5695, "step": 2154 }, { - "epoch": 0.23, - "grad_norm": 2.442785125856375, - "learning_rate": 9.019287118972343e-06, - "loss": 0.7285, + "epoch": 0.15, + "grad_norm": 1.5446225595395824, + "learning_rate": 9.609051491819608e-06, + "loss": 0.6701, "step": 2155 }, { - "epoch": 0.23, - "grad_norm": 2.5672631424976617, - "learning_rate": 9.018273167113354e-06, - "loss": 0.7115, + "epoch": 0.15, + "grad_norm": 1.8158818169452011, + "learning_rate": 9.608605905519166e-06, + "loss": 0.6685, "step": 2156 }, { - "epoch": 0.23, - "grad_norm": 3.9219116603164372, - "learning_rate": 9.017258748423e-06, - "loss": 0.7292, + "epoch": 0.15, + "grad_norm": 1.7003016401593005, + "learning_rate": 9.608160075776546e-06, + "loss": 0.6204, "step": 2157 }, { - "epoch": 0.23, - "grad_norm": 3.1655630933315155, - "learning_rate": 9.01624386301914e-06, - "loss": 0.6373, + "epoch": 0.15, + "grad_norm": 1.8663887794981275, + "learning_rate": 9.607714002615297e-06, + "loss": 0.5477, "step": 2158 }, { - "epoch": 0.23, - "grad_norm": 2.9959938750864805, - "learning_rate": 9.015228511019678e-06, - "loss": 0.6972, + "epoch": 0.15, + "grad_norm": 1.626090381541546, + "learning_rate": 9.607267686058979e-06, + "loss": 0.5924, "step": 2159 }, { - "epoch": 0.23, - "grad_norm": 2.451365705730853, - "learning_rate": 9.014212692542573e-06, - "loss": 0.7195, + "epoch": 0.15, + "grad_norm": 1.5622615076178528, + "learning_rate": 9.606821126131171e-06, + "loss": 0.6126, "step": 2160 }, { - "epoch": 0.23, - "grad_norm": 2.277660307422319, - "learning_rate": 9.013196407705842e-06, - "loss": 0.704, + "epoch": 0.15, + "grad_norm": 1.8288070540766657, + "learning_rate": 9.606374322855463e-06, + "loss": 0.602, "step": 2161 }, { - "epoch": 0.23, - "grad_norm": 3.015544240787718, - "learning_rate": 9.012179656627553e-06, - "loss": 0.6184, + "epoch": 0.15, + "grad_norm": 2.0390320878695722, + "learning_rate": 9.605927276255452e-06, + "loss": 0.5465, "step": 2162 }, { - "epoch": 0.23, - "grad_norm": 2.046162331434754, - "learning_rate": 9.011162439425831e-06, - "loss": 0.7308, + "epoch": 0.15, + "grad_norm": 1.9293573760376699, + "learning_rate": 9.605479986354758e-06, + "loss": 0.5518, "step": 2163 }, { - "epoch": 0.23, - "grad_norm": 2.62859526794707, - "learning_rate": 9.010144756218851e-06, - "loss": 0.6979, + "epoch": 0.15, + "grad_norm": 0.8671985104927161, + "learning_rate": 9.605032453177004e-06, + "loss": 0.4886, "step": 2164 }, { - "epoch": 0.23, - "grad_norm": 2.026397922330441, - "learning_rate": 9.009126607124844e-06, - "loss": 0.6918, + "epoch": 0.15, + "grad_norm": 1.5067775199868225, + "learning_rate": 9.60458467674583e-06, + "loss": 0.5802, "step": 2165 }, { - "epoch": 0.23, - "grad_norm": 2.3621280368628996, - "learning_rate": 9.008107992262098e-06, - "loss": 0.7819, + "epoch": 0.15, + "grad_norm": 1.6642928320115047, + "learning_rate": 9.604136657084894e-06, + "loss": 0.5977, "step": 2166 }, { - "epoch": 0.23, - "grad_norm": 2.176552442201047, - "learning_rate": 9.00708891174895e-06, - "loss": 0.7668, + "epoch": 0.15, + "grad_norm": 2.1621716432547835, + "learning_rate": 9.603688394217858e-06, + "loss": 0.5872, "step": 2167 }, { - "epoch": 0.23, - "grad_norm": 3.2070738864736765, - "learning_rate": 9.006069365703799e-06, - "loss": 0.6924, + "epoch": 0.15, + "grad_norm": 1.6533325789139708, + "learning_rate": 9.6032398881684e-06, + "loss": 0.5935, "step": 2168 }, { - "epoch": 0.23, - "grad_norm": 2.168031678031707, - "learning_rate": 9.005049354245088e-06, - "loss": 0.6743, + "epoch": 0.15, + "grad_norm": 1.717078837266353, + "learning_rate": 9.602791138960215e-06, + "loss": 0.5213, "step": 2169 }, { - "epoch": 0.23, - "grad_norm": 2.8808305867742656, - "learning_rate": 9.004028877491319e-06, - "loss": 0.631, + "epoch": 0.15, + "grad_norm": 3.1649378804584956, + "learning_rate": 9.602342146617005e-06, + "loss": 0.5305, "step": 2170 }, { - "epoch": 0.23, - "grad_norm": 2.295861751174668, - "learning_rate": 9.003007935561052e-06, - "loss": 0.6949, + "epoch": 0.15, + "grad_norm": 2.39592306325254, + "learning_rate": 9.601892911162488e-06, + "loss": 0.4775, "step": 2171 }, { - "epoch": 0.23, - "grad_norm": 2.49760284826448, - "learning_rate": 9.001986528572892e-06, - "loss": 0.743, + "epoch": 0.15, + "grad_norm": 1.882229706455084, + "learning_rate": 9.601443432620394e-06, + "loss": 0.5365, "step": 2172 }, { - "epoch": 0.23, - "grad_norm": 7.296535119642908, - "learning_rate": 9.000964656645508e-06, - "loss": 0.7973, + "epoch": 0.15, + "grad_norm": 1.5975631807307624, + "learning_rate": 9.600993711014466e-06, + "loss": 0.6411, "step": 2173 }, { - "epoch": 0.23, - "grad_norm": 2.2214283799588825, - "learning_rate": 8.999942319897615e-06, - "loss": 0.6888, + "epoch": 0.15, + "grad_norm": 1.8314382805967402, + "learning_rate": 9.60054374636846e-06, + "loss": 0.6603, "step": 2174 }, { - "epoch": 0.23, - "grad_norm": 3.1319580111833254, - "learning_rate": 8.998919518447986e-06, - "loss": 0.6926, + "epoch": 0.15, + "grad_norm": 2.272155485573536, + "learning_rate": 9.600093538706145e-06, + "loss": 0.5211, "step": 2175 }, { - "epoch": 0.23, - "grad_norm": 2.4770875145538556, - "learning_rate": 8.997896252415445e-06, - "loss": 0.7683, + "epoch": 0.15, + "grad_norm": 2.3826410867770718, + "learning_rate": 9.599643088051302e-06, + "loss": 0.5547, "step": 2176 }, { - "epoch": 0.23, - "grad_norm": 2.646693120678769, - "learning_rate": 8.996872521918877e-06, - "loss": 0.8287, + "epoch": 0.15, + "grad_norm": 2.0237113007011134, + "learning_rate": 9.599192394427725e-06, + "loss": 0.5605, "step": 2177 }, { - "epoch": 0.23, - "grad_norm": 2.343228561686917, - "learning_rate": 8.995848327077211e-06, - "loss": 0.7529, + "epoch": 0.15, + "grad_norm": 2.0998989279638667, + "learning_rate": 9.598741457859222e-06, + "loss": 0.5917, "step": 2178 }, { - "epoch": 0.23, - "grad_norm": 5.810342504369392, - "learning_rate": 8.994823668009437e-06, - "loss": 0.7357, + "epoch": 0.15, + "grad_norm": 1.903492470004949, + "learning_rate": 9.598290278369613e-06, + "loss": 0.583, "step": 2179 }, { - "epoch": 0.23, - "grad_norm": 2.1482922949969145, - "learning_rate": 8.9937985448346e-06, - "loss": 0.7304, + "epoch": 0.15, + "grad_norm": 0.9237882261782591, + "learning_rate": 9.597838855982728e-06, + "loss": 0.4996, "step": 2180 }, { - "epoch": 0.23, - "grad_norm": 1.8551204580841676, - "learning_rate": 8.992772957671791e-06, - "loss": 0.7101, + "epoch": 0.15, + "grad_norm": 1.483211513516119, + "learning_rate": 9.597387190722418e-06, + "loss": 0.5168, "step": 2181 }, { - "epoch": 0.23, - "grad_norm": 2.2466577666192475, - "learning_rate": 8.991746906640162e-06, - "loss": 0.6895, + "epoch": 0.15, + "grad_norm": 1.5316054163458794, + "learning_rate": 9.596935282612536e-06, + "loss": 0.5082, "step": 2182 }, { - "epoch": 0.23, - "grad_norm": 2.168650877854395, - "learning_rate": 8.990720391858915e-06, - "loss": 0.6479, + "epoch": 0.15, + "grad_norm": 1.7671303961046758, + "learning_rate": 9.596483131676957e-06, + "loss": 0.6258, "step": 2183 }, { - "epoch": 0.23, - "grad_norm": 3.300343690084461, - "learning_rate": 8.98969341344731e-06, - "loss": 0.6546, + "epoch": 0.15, + "grad_norm": 1.7231237960509258, + "learning_rate": 9.596030737939564e-06, + "loss": 0.5727, "step": 2184 }, { - "epoch": 0.23, - "grad_norm": 2.3637093422065734, - "learning_rate": 8.98866597152466e-06, - "loss": 0.6307, + "epoch": 0.16, + "grad_norm": 1.9428453320018664, + "learning_rate": 9.595578101424254e-06, + "loss": 0.6001, "step": 2185 }, { - "epoch": 0.23, - "grad_norm": 2.754311126650472, - "learning_rate": 8.987638066210325e-06, - "loss": 0.6912, + "epoch": 0.16, + "grad_norm": 1.6315944950998447, + "learning_rate": 9.595125222154935e-06, + "loss": 0.6035, "step": 2186 }, { - "epoch": 0.23, - "grad_norm": 2.1562502830328474, - "learning_rate": 8.986609697623724e-06, - "loss": 0.6785, + "epoch": 0.16, + "grad_norm": 1.5456402381571213, + "learning_rate": 9.594672100155534e-06, + "loss": 0.5526, "step": 2187 }, { - "epoch": 0.23, - "grad_norm": 2.327649052593783, - "learning_rate": 8.985580865884336e-06, - "loss": 0.7294, + "epoch": 0.16, + "grad_norm": 1.6815021837453648, + "learning_rate": 9.594218735449983e-06, + "loss": 0.5707, "step": 2188 }, { - "epoch": 0.23, - "grad_norm": 2.408598312141122, - "learning_rate": 8.984551571111683e-06, - "loss": 0.6296, + "epoch": 0.16, + "grad_norm": 1.5704510439569273, + "learning_rate": 9.59376512806223e-06, + "loss": 0.5388, "step": 2189 }, { - "epoch": 0.23, - "grad_norm": 2.142678163175262, - "learning_rate": 8.983521813425348e-06, - "loss": 0.6944, + "epoch": 0.16, + "grad_norm": 1.5483910381784542, + "learning_rate": 9.593311278016237e-06, + "loss": 0.5954, "step": 2190 }, { - "epoch": 0.23, - "grad_norm": 2.175847871801838, - "learning_rate": 8.982491592944962e-06, - "loss": 0.6382, + "epoch": 0.16, + "grad_norm": 2.6188675468635787, + "learning_rate": 9.592857185335979e-06, + "loss": 0.5892, "step": 2191 }, { - "epoch": 0.23, - "grad_norm": 3.228095121751495, - "learning_rate": 8.981460909790216e-06, - "loss": 0.5974, + "epoch": 0.16, + "grad_norm": 2.8669501752773914, + "learning_rate": 9.592402850045442e-06, + "loss": 0.5394, "step": 2192 }, { - "epoch": 0.23, - "grad_norm": 2.052783456234635, - "learning_rate": 8.98042976408085e-06, - "loss": 0.6731, + "epoch": 0.16, + "grad_norm": 1.572339027803603, + "learning_rate": 9.591948272168625e-06, + "loss": 0.5579, "step": 2193 }, { - "epoch": 0.23, - "grad_norm": 2.0986872293091468, - "learning_rate": 8.97939815593666e-06, - "loss": 0.6891, + "epoch": 0.16, + "grad_norm": 1.5709848061910616, + "learning_rate": 9.59149345172954e-06, + "loss": 0.5596, "step": 2194 }, { - "epoch": 0.23, - "grad_norm": 2.154720097555765, - "learning_rate": 8.978366085477497e-06, - "loss": 0.6185, + "epoch": 0.16, + "grad_norm": 1.8351978432055878, + "learning_rate": 9.591038388752214e-06, + "loss": 0.6708, "step": 2195 }, { - "epoch": 0.23, - "grad_norm": 2.1403969118588067, - "learning_rate": 8.977333552823261e-06, - "loss": 0.6918, + "epoch": 0.16, + "grad_norm": 2.0146901269555304, + "learning_rate": 9.590583083260684e-06, + "loss": 0.6372, "step": 2196 }, { - "epoch": 0.23, - "grad_norm": 3.33214529211307, - "learning_rate": 8.976300558093911e-06, - "loss": 0.6466, + "epoch": 0.16, + "grad_norm": 1.6144171052820337, + "learning_rate": 9.590127535278999e-06, + "loss": 0.5668, "step": 2197 }, { - "epoch": 0.23, - "grad_norm": 4.260333763778915, - "learning_rate": 8.975267101409458e-06, - "loss": 0.7482, + "epoch": 0.16, + "grad_norm": 1.6719333168452695, + "learning_rate": 9.589671744831224e-06, + "loss": 0.5591, "step": 2198 }, { - "epoch": 0.23, - "grad_norm": 2.0914836565162496, - "learning_rate": 8.974233182889961e-06, - "loss": 0.6254, + "epoch": 0.16, + "grad_norm": 1.645997266619098, + "learning_rate": 9.589215711941437e-06, + "loss": 0.5313, "step": 2199 }, { - "epoch": 0.23, - "grad_norm": 2.3762255505538676, - "learning_rate": 8.973198802655543e-06, - "loss": 0.7553, + "epoch": 0.16, + "grad_norm": 1.7735181939874929, + "learning_rate": 9.588759436633724e-06, + "loss": 0.6255, "step": 2200 }, { - "epoch": 0.23, - "grad_norm": 2.1282785961850506, - "learning_rate": 8.972163960826375e-06, - "loss": 0.7332, + "epoch": 0.16, + "grad_norm": 2.044477194970411, + "learning_rate": 9.58830291893219e-06, + "loss": 0.552, "step": 2201 }, { - "epoch": 0.23, - "grad_norm": 1.9694503720386745, - "learning_rate": 8.971128657522677e-06, - "loss": 0.7163, + "epoch": 0.16, + "grad_norm": 2.0588549482838996, + "learning_rate": 9.587846158860949e-06, + "loss": 0.6377, "step": 2202 }, { - "epoch": 0.23, - "grad_norm": 2.1801548709290546, - "learning_rate": 8.970092892864732e-06, - "loss": 0.7532, + "epoch": 0.16, + "grad_norm": 1.8870520211658623, + "learning_rate": 9.587389156444126e-06, + "loss": 0.5344, "step": 2203 }, { - "epoch": 0.23, - "grad_norm": 2.2491318122078092, - "learning_rate": 8.969056666972874e-06, - "loss": 0.716, + "epoch": 0.16, + "grad_norm": 1.7888672922822684, + "learning_rate": 9.586931911705865e-06, + "loss": 0.5301, "step": 2204 }, { - "epoch": 0.23, - "grad_norm": 2.0893548904362413, - "learning_rate": 8.968019979967482e-06, - "loss": 0.6952, + "epoch": 0.16, + "grad_norm": 1.9732692034641155, + "learning_rate": 9.586474424670318e-06, + "loss": 0.5502, "step": 2205 }, { - "epoch": 0.23, - "grad_norm": 2.2759911727787907, - "learning_rate": 8.966982831969001e-06, - "loss": 0.7345, + "epoch": 0.16, + "grad_norm": 1.6773296842675214, + "learning_rate": 9.58601669536165e-06, + "loss": 0.588, "step": 2206 }, { - "epoch": 0.23, - "grad_norm": 3.33173403077009, - "learning_rate": 8.965945223097922e-06, - "loss": 0.7495, + "epoch": 0.16, + "grad_norm": 2.6869645107956144, + "learning_rate": 9.585558723804041e-06, + "loss": 0.6322, "step": 2207 }, { - "epoch": 0.23, - "grad_norm": 1.9341550603493136, - "learning_rate": 8.964907153474791e-06, - "loss": 0.7013, + "epoch": 0.16, + "grad_norm": 2.0759641924705554, + "learning_rate": 9.585100510021683e-06, + "loss": 0.6007, "step": 2208 }, { - "epoch": 0.23, - "grad_norm": 2.1147262266169173, - "learning_rate": 8.963868623220208e-06, - "loss": 0.6709, + "epoch": 0.16, + "grad_norm": 1.5992395210403136, + "learning_rate": 9.584642054038779e-06, + "loss": 0.6028, "step": 2209 }, { - "epoch": 0.23, - "grad_norm": 2.8936564339718123, - "learning_rate": 8.962829632454829e-06, - "loss": 0.7317, + "epoch": 0.16, + "grad_norm": 1.7686854651254926, + "learning_rate": 9.584183355879547e-06, + "loss": 0.6328, "step": 2210 }, { - "epoch": 0.23, - "grad_norm": 2.4903063833013555, - "learning_rate": 8.961790181299354e-06, - "loss": 0.8207, + "epoch": 0.16, + "grad_norm": 3.1315582872671355, + "learning_rate": 9.583724415568216e-06, + "loss": 0.5506, "step": 2211 }, { - "epoch": 0.23, - "grad_norm": 2.2804678844589246, - "learning_rate": 8.960750269874552e-06, - "loss": 0.6467, + "epoch": 0.16, + "grad_norm": 1.8433443319996463, + "learning_rate": 9.58326523312903e-06, + "loss": 0.5845, "step": 2212 }, { - "epoch": 0.23, - "grad_norm": 2.4641706147225713, - "learning_rate": 8.959709898301232e-06, - "loss": 0.7512, + "epoch": 0.16, + "grad_norm": 1.7898512639951558, + "learning_rate": 9.582805808586245e-06, + "loss": 0.5632, "step": 2213 }, { - "epoch": 0.23, - "grad_norm": 2.6089124316964005, - "learning_rate": 8.958669066700261e-06, - "loss": 0.6522, + "epoch": 0.16, + "grad_norm": 0.8855795909154327, + "learning_rate": 9.582346141964127e-06, + "loss": 0.4868, "step": 2214 }, { - "epoch": 0.23, - "grad_norm": 2.19069049516705, - "learning_rate": 8.957627775192564e-06, - "loss": 0.7337, + "epoch": 0.16, + "grad_norm": 1.5522476659849769, + "learning_rate": 9.581886233286959e-06, + "loss": 0.6007, "step": 2215 }, { - "epoch": 0.23, - "grad_norm": 2.500190008138319, - "learning_rate": 8.956586023899109e-06, - "loss": 0.681, + "epoch": 0.16, + "grad_norm": 1.574129461844822, + "learning_rate": 9.581426082579035e-06, + "loss": 0.5643, "step": 2216 }, { - "epoch": 0.23, - "grad_norm": 2.159720426862354, - "learning_rate": 8.95554381294093e-06, - "loss": 0.7571, + "epoch": 0.16, + "grad_norm": 2.60781980532732, + "learning_rate": 9.580965689864662e-06, + "loss": 0.5982, "step": 2217 }, { - "epoch": 0.23, - "grad_norm": 2.0686321610309095, - "learning_rate": 8.954501142439105e-06, - "loss": 0.6577, + "epoch": 0.16, + "grad_norm": 1.8027443807974424, + "learning_rate": 9.580505055168158e-06, + "loss": 0.6132, "step": 2218 }, { - "epoch": 0.23, - "grad_norm": 2.2475898480773995, - "learning_rate": 8.953458012514766e-06, - "loss": 0.7263, + "epoch": 0.16, + "grad_norm": 1.7970061567888873, + "learning_rate": 9.580044178513857e-06, + "loss": 0.6315, "step": 2219 }, { - "epoch": 0.23, - "grad_norm": 3.4884995247883603, - "learning_rate": 8.952414423289107e-06, - "loss": 0.7057, + "epoch": 0.16, + "grad_norm": 1.67459254205551, + "learning_rate": 9.579583059926101e-06, + "loss": 0.6191, "step": 2220 }, { - "epoch": 0.23, - "grad_norm": 4.4448700774798064, - "learning_rate": 8.951370374883362e-06, - "loss": 0.7204, + "epoch": 0.16, + "grad_norm": 1.7009956070806815, + "learning_rate": 9.579121699429252e-06, + "loss": 0.5576, "step": 2221 }, { - "epoch": 0.23, - "grad_norm": 2.441270491294434, - "learning_rate": 8.950325867418831e-06, - "loss": 0.7727, + "epoch": 0.16, + "grad_norm": 1.826134229985322, + "learning_rate": 9.578660097047678e-06, + "loss": 0.5715, "step": 2222 }, { - "epoch": 0.23, - "grad_norm": 2.310391536836991, - "learning_rate": 8.949280901016859e-06, - "loss": 0.7408, + "epoch": 0.16, + "grad_norm": 1.5632386673289096, + "learning_rate": 9.578198252805764e-06, + "loss": 0.5893, "step": 2223 }, { - "epoch": 0.23, - "grad_norm": 1.894271235429747, - "learning_rate": 8.94823547579885e-06, - "loss": 0.6713, + "epoch": 0.16, + "grad_norm": 1.6710227391088774, + "learning_rate": 9.577736166727905e-06, + "loss": 0.5744, "step": 2224 }, { - "epoch": 0.23, - "grad_norm": 2.731116389165696, - "learning_rate": 8.947189591886255e-06, - "loss": 0.7204, + "epoch": 0.16, + "grad_norm": 2.0771503076900384, + "learning_rate": 9.57727383883851e-06, + "loss": 0.5631, "step": 2225 }, { - "epoch": 0.23, - "grad_norm": 2.3805815349526687, - "learning_rate": 8.946143249400582e-06, - "loss": 0.6696, + "epoch": 0.16, + "grad_norm": 1.62091495029376, + "learning_rate": 9.576811269162e-06, + "loss": 0.6058, "step": 2226 }, { - "epoch": 0.23, - "grad_norm": 2.354183378868526, - "learning_rate": 8.945096448463397e-06, - "loss": 0.7481, + "epoch": 0.16, + "grad_norm": 3.3448128979292253, + "learning_rate": 9.576348457722811e-06, + "loss": 0.6018, "step": 2227 }, { - "epoch": 0.23, - "grad_norm": 2.189115002118964, - "learning_rate": 8.944049189196308e-06, - "loss": 0.7049, + "epoch": 0.16, + "grad_norm": 1.8967589605265998, + "learning_rate": 9.57588540454539e-06, + "loss": 0.6134, "step": 2228 }, { - "epoch": 0.23, - "grad_norm": 15.927462117356988, - "learning_rate": 8.943001471720987e-06, - "loss": 0.63, + "epoch": 0.16, + "grad_norm": 2.0537029785752834, + "learning_rate": 9.575422109654195e-06, + "loss": 0.6005, "step": 2229 }, { - "epoch": 0.23, - "grad_norm": 7.588326102390013, - "learning_rate": 8.941953296159153e-06, - "loss": 0.7003, + "epoch": 0.16, + "grad_norm": 1.911467837414719, + "learning_rate": 9.574958573073702e-06, + "loss": 0.6733, "step": 2230 }, { - "epoch": 0.23, - "grad_norm": 3.0744521000216753, - "learning_rate": 8.940904662632579e-06, - "loss": 0.7521, + "epoch": 0.16, + "grad_norm": 1.5095570770846123, + "learning_rate": 9.574494794828396e-06, + "loss": 0.5503, "step": 2231 }, { - "epoch": 0.23, - "grad_norm": 3.4628646582770877, - "learning_rate": 8.939855571263095e-06, - "loss": 0.6593, + "epoch": 0.16, + "grad_norm": 1.760778089890679, + "learning_rate": 9.574030774942773e-06, + "loss": 0.6581, "step": 2232 }, { - "epoch": 0.23, - "grad_norm": 7.273569477502985, - "learning_rate": 8.938806022172578e-06, - "loss": 0.6958, + "epoch": 0.16, + "grad_norm": 1.5223044814977023, + "learning_rate": 9.573566513441347e-06, + "loss": 0.5911, "step": 2233 }, { - "epoch": 0.24, - "grad_norm": 2.3321350896927435, - "learning_rate": 8.937756015482962e-06, - "loss": 0.706, + "epoch": 0.16, + "grad_norm": 0.8778431511731889, + "learning_rate": 9.573102010348639e-06, + "loss": 0.4694, "step": 2234 }, { - "epoch": 0.24, - "grad_norm": 1.249108717782656, - "learning_rate": 8.936705551316238e-06, - "loss": 0.5939, + "epoch": 0.16, + "grad_norm": 1.8317574087663744, + "learning_rate": 9.572637265689187e-06, + "loss": 0.577, "step": 2235 }, { - "epoch": 0.24, - "grad_norm": 2.4287842458282207, - "learning_rate": 8.935654629794442e-06, - "loss": 0.7158, + "epoch": 0.16, + "grad_norm": 1.8848236974022117, + "learning_rate": 9.57217227948754e-06, + "loss": 0.5466, "step": 2236 }, { - "epoch": 0.24, - "grad_norm": 2.7772840916896127, - "learning_rate": 8.934603251039667e-06, - "loss": 0.6861, + "epoch": 0.16, + "grad_norm": 1.741569178705166, + "learning_rate": 9.571707051768263e-06, + "loss": 0.5701, "step": 2237 }, { - "epoch": 0.24, - "grad_norm": 2.350481813294236, - "learning_rate": 8.93355141517406e-06, - "loss": 0.7134, + "epoch": 0.16, + "grad_norm": 1.584629981791469, + "learning_rate": 9.571241582555925e-06, + "loss": 0.5997, "step": 2238 }, { - "epoch": 0.24, - "grad_norm": 1.9394185187015496, - "learning_rate": 8.932499122319821e-06, - "loss": 0.6629, + "epoch": 0.16, + "grad_norm": 1.7385750394422443, + "learning_rate": 9.570775871875119e-06, + "loss": 0.5328, "step": 2239 }, { - "epoch": 0.24, - "grad_norm": 2.664123857241033, - "learning_rate": 8.931446372599202e-06, - "loss": 0.6152, + "epoch": 0.16, + "grad_norm": 1.6121997739363734, + "learning_rate": 9.570309919750445e-06, + "loss": 0.6166, "step": 2240 }, { - "epoch": 0.24, - "grad_norm": 2.349915878479327, - "learning_rate": 8.930393166134507e-06, - "loss": 0.6989, + "epoch": 0.16, + "grad_norm": 1.0325775857779589, + "learning_rate": 9.569843726206513e-06, + "loss": 0.4647, "step": 2241 }, { - "epoch": 0.24, - "grad_norm": 2.206930105187941, - "learning_rate": 8.929339503048096e-06, - "loss": 0.6799, + "epoch": 0.16, + "grad_norm": 2.061368481924826, + "learning_rate": 9.569377291267951e-06, + "loss": 0.5906, "step": 2242 }, { - "epoch": 0.24, - "grad_norm": 2.647423217564447, - "learning_rate": 8.92828538346238e-06, - "loss": 0.7513, + "epoch": 0.16, + "grad_norm": 1.8702553276692877, + "learning_rate": 9.568910614959398e-06, + "loss": 0.54, "step": 2243 }, { - "epoch": 0.24, - "grad_norm": 2.4210465633816582, - "learning_rate": 8.927230807499824e-06, - "loss": 0.6743, + "epoch": 0.16, + "grad_norm": 2.6898081856322613, + "learning_rate": 9.568443697305502e-06, + "loss": 0.5316, "step": 2244 }, { - "epoch": 0.24, - "grad_norm": 2.5877919600311934, - "learning_rate": 8.926175775282946e-06, - "loss": 0.6994, + "epoch": 0.16, + "grad_norm": 0.7718021248675229, + "learning_rate": 9.567976538330932e-06, + "loss": 0.4562, "step": 2245 }, { - "epoch": 0.24, - "grad_norm": 2.158209740707754, - "learning_rate": 8.925120286934315e-06, - "loss": 0.6448, + "epoch": 0.16, + "grad_norm": 1.6905313761064888, + "learning_rate": 9.567509138060362e-06, + "loss": 0.5832, "step": 2246 }, { - "epoch": 0.24, - "grad_norm": 2.3832699071525774, - "learning_rate": 8.924064342576554e-06, - "loss": 0.6684, + "epoch": 0.16, + "grad_norm": 1.7547169716128395, + "learning_rate": 9.567041496518485e-06, + "loss": 0.5664, "step": 2247 }, { - "epoch": 0.24, - "grad_norm": 2.400928597949794, - "learning_rate": 8.923007942332345e-06, - "loss": 0.73, + "epoch": 0.16, + "grad_norm": 1.4795891134233017, + "learning_rate": 9.566573613729997e-06, + "loss": 0.5177, "step": 2248 }, { - "epoch": 0.24, - "grad_norm": 2.9437835667880945, - "learning_rate": 8.92195108632441e-06, - "loss": 0.7273, + "epoch": 0.16, + "grad_norm": 1.8949248441473716, + "learning_rate": 9.56610548971962e-06, + "loss": 0.6199, "step": 2249 }, { - "epoch": 0.24, - "grad_norm": 4.870436670961615, - "learning_rate": 8.920893774675536e-06, - "loss": 0.705, + "epoch": 0.16, + "grad_norm": 1.926553837860844, + "learning_rate": 9.565637124512078e-06, + "loss": 0.5275, "step": 2250 }, { - "epoch": 0.24, - "grad_norm": 3.3747630286954555, - "learning_rate": 8.919836007508558e-06, - "loss": 0.7267, + "epoch": 0.16, + "grad_norm": 1.7764817792214893, + "learning_rate": 9.565168518132112e-06, + "loss": 0.5605, "step": 2251 }, { - "epoch": 0.24, - "grad_norm": 3.341185883155674, - "learning_rate": 8.918777784946364e-06, - "loss": 0.712, + "epoch": 0.16, + "grad_norm": 1.835532596312182, + "learning_rate": 9.564699670604477e-06, + "loss": 0.5921, "step": 2252 }, { - "epoch": 0.24, - "grad_norm": 2.0954214888627747, - "learning_rate": 8.917719107111893e-06, - "loss": 0.6333, + "epoch": 0.16, + "grad_norm": 0.8417538997575917, + "learning_rate": 9.564230581953938e-06, + "loss": 0.4619, "step": 2253 }, { - "epoch": 0.24, - "grad_norm": 1.2833413585564724, - "learning_rate": 8.916659974128144e-06, - "loss": 0.6779, + "epoch": 0.16, + "grad_norm": 7.081020260740657, + "learning_rate": 9.563761252205274e-06, + "loss": 0.5895, "step": 2254 }, { - "epoch": 0.24, - "grad_norm": 2.1835249116285995, - "learning_rate": 8.91560038611816e-06, - "loss": 0.705, + "epoch": 0.16, + "grad_norm": 1.5878941432343703, + "learning_rate": 9.563291681383277e-06, + "loss": 0.5776, "step": 2255 }, { - "epoch": 0.24, - "grad_norm": 7.568232696610119, - "learning_rate": 8.91454034320504e-06, - "loss": 0.6866, + "epoch": 0.16, + "grad_norm": 0.9397587592195954, + "learning_rate": 9.56282186951275e-06, + "loss": 0.4777, "step": 2256 }, { - "epoch": 0.24, - "grad_norm": 2.7059228367154904, - "learning_rate": 8.913479845511942e-06, - "loss": 0.6874, + "epoch": 0.16, + "grad_norm": 1.6050895731983348, + "learning_rate": 9.562351816618511e-06, + "loss": 0.5867, "step": 2257 }, { - "epoch": 0.24, - "grad_norm": 2.1413067070108274, - "learning_rate": 8.912418893162066e-06, - "loss": 0.6511, + "epoch": 0.16, + "grad_norm": 1.758575022596704, + "learning_rate": 9.56188152272539e-06, + "loss": 0.5851, "step": 2258 }, { - "epoch": 0.24, - "grad_norm": 2.553360298084785, - "learning_rate": 8.91135748627867e-06, - "loss": 0.6896, + "epoch": 0.16, + "grad_norm": 2.045975616697389, + "learning_rate": 9.561410987858228e-06, + "loss": 0.5362, "step": 2259 }, { - "epoch": 0.24, - "grad_norm": 2.5267231876409033, - "learning_rate": 8.910295624985072e-06, - "loss": 0.7684, + "epoch": 0.16, + "grad_norm": 1.6928466069758905, + "learning_rate": 9.560940212041882e-06, + "loss": 0.6096, "step": 2260 }, { - "epoch": 0.24, - "grad_norm": 2.267588533534899, - "learning_rate": 8.909233309404632e-06, - "loss": 0.7048, + "epoch": 0.16, + "grad_norm": 1.525379030457798, + "learning_rate": 9.56046919530122e-06, + "loss": 0.5367, "step": 2261 }, { - "epoch": 0.24, - "grad_norm": 6.118523741223378, - "learning_rate": 8.908170539660766e-06, - "loss": 0.7692, + "epoch": 0.16, + "grad_norm": 1.8462895350414437, + "learning_rate": 9.559997937661122e-06, + "loss": 0.6299, "step": 2262 }, { - "epoch": 0.24, - "grad_norm": 3.7350137185652335, - "learning_rate": 8.907107315876942e-06, - "loss": 0.5702, + "epoch": 0.16, + "grad_norm": 1.7648216144064959, + "learning_rate": 9.559526439146481e-06, + "loss": 0.5425, "step": 2263 }, { - "epoch": 0.24, - "grad_norm": 1.2334438560399243, - "learning_rate": 8.906043638176686e-06, - "loss": 0.6064, + "epoch": 0.16, + "grad_norm": 1.497447950527088, + "learning_rate": 9.559054699782204e-06, + "loss": 0.5718, "step": 2264 }, { - "epoch": 0.24, - "grad_norm": 2.408881941929093, - "learning_rate": 8.90497950668357e-06, - "loss": 0.7069, + "epoch": 0.16, + "grad_norm": 1.6003208778877256, + "learning_rate": 9.558582719593211e-06, + "loss": 0.6111, "step": 2265 }, { - "epoch": 0.24, - "grad_norm": 0.985892002965994, - "learning_rate": 8.903914921521226e-06, - "loss": 0.6155, + "epoch": 0.16, + "grad_norm": 1.6645270149575964, + "learning_rate": 9.55811049860443e-06, + "loss": 0.4911, "step": 2266 }, { - "epoch": 0.24, - "grad_norm": 3.0808888218270587, - "learning_rate": 8.90284988281333e-06, - "loss": 0.6486, + "epoch": 0.16, + "grad_norm": 2.1706601368882144, + "learning_rate": 9.557638036840809e-06, + "loss": 0.5559, "step": 2267 }, { - "epoch": 0.24, - "grad_norm": 2.235696390043599, - "learning_rate": 8.901784390683616e-06, - "loss": 0.7458, + "epoch": 0.16, + "grad_norm": 0.8455681766874805, + "learning_rate": 9.5571653343273e-06, + "loss": 0.4637, "step": 2268 }, { - "epoch": 0.24, - "grad_norm": 2.545711880315011, - "learning_rate": 8.90071844525587e-06, - "loss": 0.6872, + "epoch": 0.16, + "grad_norm": 1.7523197952716691, + "learning_rate": 9.55669239108888e-06, + "loss": 0.5427, "step": 2269 }, { - "epoch": 0.24, - "grad_norm": 3.1494684836406015, - "learning_rate": 8.89965204665393e-06, - "loss": 0.6339, + "epoch": 0.16, + "grad_norm": 2.8201679885765936, + "learning_rate": 9.556219207150527e-06, + "loss": 0.5048, "step": 2270 }, { - "epoch": 0.24, - "grad_norm": 1.2914981848806717, - "learning_rate": 8.898585195001691e-06, - "loss": 0.6513, + "epoch": 0.16, + "grad_norm": 2.803540149610321, + "learning_rate": 9.555745782537238e-06, + "loss": 0.5977, "step": 2271 }, { - "epoch": 0.24, - "grad_norm": 2.6252182724430613, - "learning_rate": 8.897517890423092e-06, - "loss": 0.6416, + "epoch": 0.16, + "grad_norm": 2.26316228558787, + "learning_rate": 9.555272117274017e-06, + "loss": 0.6059, "step": 2272 }, { - "epoch": 0.24, - "grad_norm": 2.3145266443168397, - "learning_rate": 8.896450133042132e-06, - "loss": 0.5625, + "epoch": 0.16, + "grad_norm": 1.6768609202073599, + "learning_rate": 9.55479821138589e-06, + "loss": 0.5191, "step": 2273 }, { - "epoch": 0.24, - "grad_norm": 2.9425490217808346, - "learning_rate": 8.895381922982857e-06, - "loss": 0.7486, + "epoch": 0.16, + "grad_norm": 1.9711481634640542, + "learning_rate": 9.554324064897886e-06, + "loss": 0.6052, "step": 2274 }, { - "epoch": 0.24, - "grad_norm": 2.4409922366780656, - "learning_rate": 8.894313260369372e-06, - "loss": 0.7289, + "epoch": 0.16, + "grad_norm": 1.5738268900227714, + "learning_rate": 9.553849677835054e-06, + "loss": 0.5575, "step": 2275 }, { - "epoch": 0.24, - "grad_norm": 2.7296566582591337, - "learning_rate": 8.89324414532583e-06, - "loss": 0.652, + "epoch": 0.16, + "grad_norm": 1.7436015310932238, + "learning_rate": 9.553375050222451e-06, + "loss": 0.523, "step": 2276 }, { - "epoch": 0.24, - "grad_norm": 2.7073730009269488, - "learning_rate": 8.892174577976438e-06, - "loss": 0.6029, + "epoch": 0.16, + "grad_norm": 1.8681704949767293, + "learning_rate": 9.552900182085148e-06, + "loss": 0.6432, "step": 2277 }, { - "epoch": 0.24, - "grad_norm": 2.984224980813416, - "learning_rate": 8.891104558445454e-06, - "loss": 0.6809, + "epoch": 0.16, + "grad_norm": 1.9046891759528701, + "learning_rate": 9.552425073448231e-06, + "loss": 0.5633, "step": 2278 }, { - "epoch": 0.24, - "grad_norm": 2.5742027243121153, - "learning_rate": 8.890034086857189e-06, - "loss": 0.6906, + "epoch": 0.16, + "grad_norm": 7.567046814384813, + "learning_rate": 9.551949724336796e-06, + "loss": 0.5314, "step": 2279 }, { - "epoch": 0.24, - "grad_norm": 2.0483532696980427, - "learning_rate": 8.88896316333601e-06, - "loss": 0.7207, + "epoch": 0.16, + "grad_norm": 0.8532380853835552, + "learning_rate": 9.551474134775951e-06, + "loss": 0.4945, "step": 2280 }, { - "epoch": 0.24, - "grad_norm": 2.7333128521952657, - "learning_rate": 8.887891788006334e-06, - "loss": 0.7079, + "epoch": 0.16, + "grad_norm": 1.6764141906563923, + "learning_rate": 9.550998304790822e-06, + "loss": 0.6226, "step": 2281 }, { - "epoch": 0.24, - "grad_norm": 2.860179858149371, - "learning_rate": 8.886819960992626e-06, - "loss": 0.6179, + "epoch": 0.16, + "grad_norm": 1.9309657957773585, + "learning_rate": 9.550522234406539e-06, + "loss": 0.5896, "step": 2282 }, { - "epoch": 0.24, - "grad_norm": 2.398502797457853, - "learning_rate": 8.885747682419413e-06, - "loss": 0.6413, + "epoch": 0.16, + "grad_norm": 1.6499625908058015, + "learning_rate": 9.550045923648255e-06, + "loss": 0.5864, "step": 2283 }, { - "epoch": 0.24, - "grad_norm": 2.3814307011526807, - "learning_rate": 8.884674952411265e-06, - "loss": 0.815, + "epoch": 0.16, + "grad_norm": 1.8110072429982764, + "learning_rate": 9.549569372541126e-06, + "loss": 0.566, "step": 2284 }, { - "epoch": 0.24, - "grad_norm": 2.91263925062964, - "learning_rate": 8.883601771092812e-06, - "loss": 0.6907, + "epoch": 0.16, + "grad_norm": 0.9582411152493978, + "learning_rate": 9.549092581110326e-06, + "loss": 0.4819, "step": 2285 }, { - "epoch": 0.24, - "grad_norm": 2.470495388049324, - "learning_rate": 8.882528138588729e-06, - "loss": 0.7293, + "epoch": 0.16, + "grad_norm": 1.8450713253068163, + "learning_rate": 9.548615549381044e-06, + "loss": 0.6111, "step": 2286 }, { - "epoch": 0.24, - "grad_norm": 2.206151479410179, - "learning_rate": 8.881454055023752e-06, - "loss": 0.7459, + "epoch": 0.16, + "grad_norm": 1.444549650358243, + "learning_rate": 9.548138277378474e-06, + "loss": 0.5114, "step": 2287 }, { - "epoch": 0.24, - "grad_norm": 2.432016434550301, - "learning_rate": 8.880379520522664e-06, - "loss": 0.762, + "epoch": 0.16, + "grad_norm": 3.5153317888157827, + "learning_rate": 9.547660765127828e-06, + "loss": 0.5574, "step": 2288 }, { - "epoch": 0.24, - "grad_norm": 2.8140952416127694, - "learning_rate": 8.879304535210298e-06, - "loss": 0.6734, + "epoch": 0.16, + "grad_norm": 2.066359722809632, + "learning_rate": 9.547183012654333e-06, + "loss": 0.5588, "step": 2289 }, { - "epoch": 0.24, - "grad_norm": 2.473443750742672, - "learning_rate": 8.878229099211548e-06, - "loss": 0.75, + "epoch": 0.16, + "grad_norm": 1.6305623091568135, + "learning_rate": 9.546705019983222e-06, + "loss": 0.6113, "step": 2290 }, { - "epoch": 0.24, - "grad_norm": 4.536037475518659, - "learning_rate": 8.87715321265135e-06, - "loss": 0.6536, + "epoch": 0.16, + "grad_norm": 1.5801868377912687, + "learning_rate": 9.546226787139747e-06, + "loss": 0.5991, "step": 2291 }, { - "epoch": 0.24, - "grad_norm": 2.268832339589105, - "learning_rate": 8.8760768756547e-06, - "loss": 0.7971, + "epoch": 0.16, + "grad_norm": 1.6969414209449643, + "learning_rate": 9.545748314149167e-06, + "loss": 0.5764, "step": 2292 }, { - "epoch": 0.24, - "grad_norm": 2.784421921853551, - "learning_rate": 8.875000088346642e-06, - "loss": 0.7665, + "epoch": 0.16, + "grad_norm": 1.7697694827073864, + "learning_rate": 9.545269601036758e-06, + "loss": 0.6768, "step": 2293 }, { - "epoch": 0.24, - "grad_norm": 2.494207647604574, - "learning_rate": 8.873922850852276e-06, - "loss": 0.5876, + "epoch": 0.16, + "grad_norm": 0.9425085773043032, + "learning_rate": 9.544790647827808e-06, + "loss": 0.4692, "step": 2294 }, { - "epoch": 0.24, - "grad_norm": 4.963714518286977, - "learning_rate": 8.872845163296752e-06, - "loss": 0.7046, + "epoch": 0.16, + "grad_norm": 1.8764799195706343, + "learning_rate": 9.544311454547615e-06, + "loss": 0.6233, "step": 2295 }, { - "epoch": 0.24, - "grad_norm": 3.1676492228416104, - "learning_rate": 8.87176702580527e-06, - "loss": 0.6656, + "epoch": 0.16, + "grad_norm": 0.8325025945740591, + "learning_rate": 9.543832021221495e-06, + "loss": 0.4423, "step": 2296 }, { - "epoch": 0.24, - "grad_norm": 2.5119192830739734, - "learning_rate": 8.87068843850309e-06, - "loss": 0.7083, + "epoch": 0.16, + "grad_norm": 2.3341359308923195, + "learning_rate": 9.543352347874767e-06, + "loss": 0.5281, "step": 2297 }, { - "epoch": 0.24, - "grad_norm": 2.943714911949743, - "learning_rate": 8.869609401515516e-06, - "loss": 0.6959, + "epoch": 0.16, + "grad_norm": 1.968979517165017, + "learning_rate": 9.542872434532777e-06, + "loss": 0.5854, "step": 2298 }, { - "epoch": 0.24, - "grad_norm": 2.6887677900783884, - "learning_rate": 8.868529914967908e-06, - "loss": 0.6394, + "epoch": 0.16, + "grad_norm": 1.5007280234917677, + "learning_rate": 9.54239228122087e-06, + "loss": 0.5436, "step": 2299 }, { - "epoch": 0.24, - "grad_norm": 2.2402749666716884, - "learning_rate": 8.867449978985676e-06, - "loss": 0.7139, + "epoch": 0.16, + "grad_norm": 1.7661531266002837, + "learning_rate": 9.541911887964411e-06, + "loss": 0.5222, "step": 2300 }, { - "epoch": 0.24, - "grad_norm": 2.8829424956949716, - "learning_rate": 8.866369593694285e-06, - "loss": 0.6697, + "epoch": 0.16, + "grad_norm": 1.8600673906472942, + "learning_rate": 9.541431254788777e-06, + "loss": 0.6109, "step": 2301 }, { - "epoch": 0.24, - "grad_norm": 2.746440356252218, - "learning_rate": 8.865288759219251e-06, - "loss": 0.7202, + "epoch": 0.16, + "grad_norm": 2.3995035159209324, + "learning_rate": 9.540950381719354e-06, + "loss": 0.5085, "step": 2302 }, { - "epoch": 0.24, - "grad_norm": 3.1527046705438075, - "learning_rate": 8.864207475686142e-06, - "loss": 0.724, + "epoch": 0.16, + "grad_norm": 1.6448540480073275, + "learning_rate": 9.540469268781547e-06, + "loss": 0.5431, "step": 2303 }, { - "epoch": 0.24, - "grad_norm": 2.95970505604412, - "learning_rate": 8.86312574322058e-06, - "loss": 0.7037, + "epoch": 0.16, + "grad_norm": 2.0339863132545495, + "learning_rate": 9.539987916000766e-06, + "loss": 0.5451, "step": 2304 }, { - "epoch": 0.24, - "grad_norm": 2.271026223729123, - "learning_rate": 8.862043561948237e-06, - "loss": 0.6285, + "epoch": 0.16, + "grad_norm": 0.9169628881652333, + "learning_rate": 9.53950632340244e-06, + "loss": 0.4731, "step": 2305 }, { - "epoch": 0.24, - "grad_norm": 1.8402186327283752, - "learning_rate": 8.860960931994835e-06, - "loss": 0.6876, + "epoch": 0.16, + "grad_norm": 2.1133686503171796, + "learning_rate": 9.539024491012008e-06, + "loss": 0.579, "step": 2306 }, { - "epoch": 0.24, - "grad_norm": 3.098504551725227, - "learning_rate": 8.859877853486154e-06, - "loss": 0.7273, + "epoch": 0.16, + "grad_norm": 1.8118977628197253, + "learning_rate": 9.538542418854923e-06, + "loss": 0.5723, "step": 2307 }, { - "epoch": 0.24, - "grad_norm": 2.2831168072415884, - "learning_rate": 8.85879432654802e-06, - "loss": 0.7357, + "epoch": 0.16, + "grad_norm": 2.2040538902171942, + "learning_rate": 9.538060106956648e-06, + "loss": 0.5228, "step": 2308 }, { - "epoch": 0.24, - "grad_norm": 3.372416894210338, - "learning_rate": 8.85771035130632e-06, - "loss": 0.6096, + "epoch": 0.16, + "grad_norm": 3.117042140835161, + "learning_rate": 9.537577555342661e-06, + "loss": 0.5867, "step": 2309 }, { - "epoch": 0.24, - "grad_norm": 7.296320517752463, - "learning_rate": 8.85662592788698e-06, - "loss": 0.6875, + "epoch": 0.16, + "grad_norm": 2.1791507166901103, + "learning_rate": 9.537094764038454e-06, + "loss": 0.543, "step": 2310 }, { - "epoch": 0.24, - "grad_norm": 3.8593869238201894, - "learning_rate": 8.855541056415988e-06, - "loss": 0.7534, + "epoch": 0.16, + "grad_norm": 1.7105894647637536, + "learning_rate": 9.536611733069526e-06, + "loss": 0.6596, "step": 2311 }, { - "epoch": 0.24, - "grad_norm": 2.688210282987243, - "learning_rate": 8.854455737019381e-06, - "loss": 0.7085, + "epoch": 0.16, + "grad_norm": 1.6561209510916075, + "learning_rate": 9.536128462461393e-06, + "loss": 0.5279, "step": 2312 }, { - "epoch": 0.24, - "grad_norm": 2.2398910749922645, - "learning_rate": 8.853369969823249e-06, - "loss": 0.7067, + "epoch": 0.16, + "grad_norm": 1.6024225241637267, + "learning_rate": 9.535644952239587e-06, + "loss": 0.5698, "step": 2313 }, { - "epoch": 0.24, - "grad_norm": 2.5141437787544123, - "learning_rate": 8.852283754953734e-06, - "loss": 0.6465, + "epoch": 0.16, + "grad_norm": 1.8655417953022526, + "learning_rate": 9.535161202429644e-06, + "loss": 0.608, "step": 2314 }, { - "epoch": 0.24, - "grad_norm": 3.3203384815403836, - "learning_rate": 8.851197092537027e-06, - "loss": 0.6464, + "epoch": 0.16, + "grad_norm": 1.6109186024221125, + "learning_rate": 9.53467721305712e-06, + "loss": 0.5862, "step": 2315 }, { - "epoch": 0.24, - "grad_norm": 4.071240596546394, - "learning_rate": 8.850109982699375e-06, - "loss": 0.7057, + "epoch": 0.16, + "grad_norm": 2.0421673838541654, + "learning_rate": 9.534192984147579e-06, + "loss": 0.659, "step": 2316 }, { - "epoch": 0.24, - "grad_norm": 5.27586745399632, - "learning_rate": 8.849022425567074e-06, - "loss": 0.5833, + "epoch": 0.16, + "grad_norm": 2.2089364039580737, + "learning_rate": 9.533708515726601e-06, + "loss": 0.6131, "step": 2317 }, { - "epoch": 0.24, - "grad_norm": 2.242964035717761, - "learning_rate": 8.847934421266475e-06, - "loss": 0.6137, + "epoch": 0.16, + "grad_norm": 1.8999276601250203, + "learning_rate": 9.533223807819777e-06, + "loss": 0.5364, "step": 2318 }, { - "epoch": 0.24, - "grad_norm": 3.005217367036941, - "learning_rate": 8.846845969923977e-06, - "loss": 0.6083, + "epoch": 0.16, + "grad_norm": 1.6010216285103114, + "learning_rate": 9.53273886045271e-06, + "loss": 0.5575, "step": 2319 }, { - "epoch": 0.24, - "grad_norm": 2.9145747544167695, - "learning_rate": 8.845757071666035e-06, - "loss": 0.6655, + "epoch": 0.16, + "grad_norm": 1.8098938675614238, + "learning_rate": 9.532253673651019e-06, + "loss": 0.5323, "step": 2320 }, { - "epoch": 0.24, - "grad_norm": 3.0295015098864075, - "learning_rate": 8.844667726619153e-06, - "loss": 0.8086, + "epoch": 0.16, + "grad_norm": 1.8270853260375381, + "learning_rate": 9.531768247440331e-06, + "loss": 0.5857, "step": 2321 }, { - "epoch": 0.24, - "grad_norm": 2.2677233953465716, - "learning_rate": 8.843577934909888e-06, - "loss": 0.7134, + "epoch": 0.16, + "grad_norm": 1.8707815379764978, + "learning_rate": 9.531282581846288e-06, + "loss": 0.5775, "step": 2322 }, { - "epoch": 0.24, - "grad_norm": 3.0387979236232696, - "learning_rate": 8.84248769666485e-06, - "loss": 0.6452, + "epoch": 0.16, + "grad_norm": 1.6018258728666597, + "learning_rate": 9.530796676894544e-06, + "loss": 0.5362, "step": 2323 }, { - "epoch": 0.24, - "grad_norm": 2.3092678024403184, - "learning_rate": 8.8413970120107e-06, - "loss": 0.6905, + "epoch": 0.16, + "grad_norm": 1.702538687916517, + "learning_rate": 9.53031053261077e-06, + "loss": 0.6099, "step": 2324 }, { - "epoch": 0.24, - "grad_norm": 2.441938437783016, - "learning_rate": 8.840305881074147e-06, - "loss": 0.7386, + "epoch": 0.16, + "grad_norm": 2.064452673716773, + "learning_rate": 9.52982414902064e-06, + "loss": 0.6436, "step": 2325 }, { - "epoch": 0.24, - "grad_norm": 2.299488064549423, - "learning_rate": 8.83921430398196e-06, - "loss": 0.5852, + "epoch": 0.17, + "grad_norm": 1.5300102986508581, + "learning_rate": 9.529337526149851e-06, + "loss": 0.5908, "step": 2326 }, { - "epoch": 0.24, - "grad_norm": 2.346680432521479, - "learning_rate": 8.838122280860953e-06, - "loss": 0.6816, + "epoch": 0.17, + "grad_norm": 1.1000618819355426, + "learning_rate": 9.528850664024106e-06, + "loss": 0.4588, "step": 2327 }, { - "epoch": 0.24, - "grad_norm": 1.1971116175516738, - "learning_rate": 8.837029811837991e-06, - "loss": 0.6475, + "epoch": 0.17, + "grad_norm": 1.7690080936288721, + "learning_rate": 9.528363562669122e-06, + "loss": 0.6264, "step": 2328 }, { - "epoch": 0.25, - "grad_norm": 3.033820645161564, - "learning_rate": 8.83593689704e-06, - "loss": 0.6569, + "epoch": 0.17, + "grad_norm": 1.8782182668667409, + "learning_rate": 9.52787622211063e-06, + "loss": 0.5555, "step": 2329 }, { - "epoch": 0.25, - "grad_norm": 2.2373130942751507, - "learning_rate": 8.834843536593949e-06, - "loss": 0.7147, + "epoch": 0.17, + "grad_norm": 1.923074786664883, + "learning_rate": 9.527388642374375e-06, + "loss": 0.5828, "step": 2330 }, { - "epoch": 0.25, - "grad_norm": 2.462343548899127, - "learning_rate": 8.833749730626862e-06, - "loss": 0.6969, + "epoch": 0.17, + "grad_norm": 2.092065847314824, + "learning_rate": 9.526900823486111e-06, + "loss": 0.5224, "step": 2331 }, { - "epoch": 0.25, - "grad_norm": 3.597236651843794, - "learning_rate": 8.832655479265812e-06, - "loss": 0.6882, + "epoch": 0.17, + "grad_norm": 1.777785016164713, + "learning_rate": 9.526412765471606e-06, + "loss": 0.6682, "step": 2332 }, { - "epoch": 0.25, - "grad_norm": 1.2374268550953933, - "learning_rate": 8.831560782637929e-06, - "loss": 0.562, + "epoch": 0.17, + "grad_norm": 1.5731246622386497, + "learning_rate": 9.525924468356641e-06, + "loss": 0.5701, "step": 2333 }, { - "epoch": 0.25, - "grad_norm": 2.383491282270233, - "learning_rate": 8.830465640870388e-06, - "loss": 0.7145, + "epoch": 0.17, + "grad_norm": 1.764844363487853, + "learning_rate": 9.52543593216701e-06, + "loss": 0.5693, "step": 2334 }, { - "epoch": 0.25, - "grad_norm": 2.1925762438333, - "learning_rate": 8.829370054090423e-06, - "loss": 0.6832, + "epoch": 0.17, + "grad_norm": 1.697014126864203, + "learning_rate": 9.52494715692852e-06, + "loss": 0.5263, "step": 2335 }, { - "epoch": 0.25, - "grad_norm": 3.41126511819354, - "learning_rate": 8.828274022425316e-06, - "loss": 0.7105, + "epoch": 0.17, + "grad_norm": 1.9061245978370185, + "learning_rate": 9.524458142666986e-06, + "loss": 0.5092, "step": 2336 }, { - "epoch": 0.25, - "grad_norm": 2.863081103637933, - "learning_rate": 8.827177546002398e-06, - "loss": 0.7167, + "epoch": 0.17, + "grad_norm": 1.8106454151111813, + "learning_rate": 9.523968889408244e-06, + "loss": 0.6139, "step": 2337 }, { - "epoch": 0.25, - "grad_norm": 2.3220167881162324, - "learning_rate": 8.826080624949056e-06, - "loss": 0.6032, + "epoch": 0.17, + "grad_norm": 2.2130575842688014, + "learning_rate": 9.523479397178135e-06, + "loss": 0.4778, "step": 2338 }, { - "epoch": 0.25, - "grad_norm": 2.4710434596501796, - "learning_rate": 8.824983259392727e-06, - "loss": 0.6848, + "epoch": 0.17, + "grad_norm": 1.609642801926249, + "learning_rate": 9.522989666002516e-06, + "loss": 0.5713, "step": 2339 }, { - "epoch": 0.25, - "grad_norm": 2.0728765590634004, - "learning_rate": 8.823885449460899e-06, - "loss": 0.6752, + "epoch": 0.17, + "grad_norm": 1.7784604859018336, + "learning_rate": 9.522499695907256e-06, + "loss": 0.5977, "step": 2340 }, { - "epoch": 0.25, - "grad_norm": 2.173323985472954, - "learning_rate": 8.822787195281114e-06, - "loss": 0.6507, + "epoch": 0.17, + "grad_norm": 5.797696514364497, + "learning_rate": 9.52200948691824e-06, + "loss": 0.5925, "step": 2341 }, { - "epoch": 0.25, - "grad_norm": 3.022192983168258, - "learning_rate": 8.821688496980964e-06, - "loss": 0.5687, + "epoch": 0.17, + "grad_norm": 1.8901053930844403, + "learning_rate": 9.52151903906136e-06, + "loss": 0.6032, "step": 2342 }, { - "epoch": 0.25, - "grad_norm": 2.321047814509788, - "learning_rate": 8.82058935468809e-06, - "loss": 0.7194, + "epoch": 0.17, + "grad_norm": 5.353562919374689, + "learning_rate": 9.521028352362522e-06, + "loss": 0.5623, "step": 2343 }, { - "epoch": 0.25, - "grad_norm": 2.4035818387743038, - "learning_rate": 8.819489768530192e-06, - "loss": 0.6671, + "epoch": 0.17, + "grad_norm": 1.7319738116739942, + "learning_rate": 9.520537426847648e-06, + "loss": 0.6051, "step": 2344 }, { - "epoch": 0.25, - "grad_norm": 3.2287384454438013, - "learning_rate": 8.818389738635012e-06, - "loss": 0.6707, + "epoch": 0.17, + "grad_norm": 2.077370649933147, + "learning_rate": 9.520046262542671e-06, + "loss": 0.6295, "step": 2345 }, { - "epoch": 0.25, - "grad_norm": 2.1051049030090847, - "learning_rate": 8.817289265130348e-06, - "loss": 0.7092, + "epoch": 0.17, + "grad_norm": 1.7247342881531706, + "learning_rate": 9.519554859473532e-06, + "loss": 0.5701, "step": 2346 }, { - "epoch": 0.25, - "grad_norm": 2.6891874499459885, - "learning_rate": 8.816188348144054e-06, - "loss": 0.7263, + "epoch": 0.17, + "grad_norm": 1.761322597026166, + "learning_rate": 9.51906321766619e-06, + "loss": 0.6101, "step": 2347 }, { - "epoch": 0.25, - "grad_norm": 1.8695048876833085, - "learning_rate": 8.815086987804029e-06, - "loss": 0.661, + "epoch": 0.17, + "grad_norm": 2.596272950258052, + "learning_rate": 9.518571337146621e-06, + "loss": 0.5443, "step": 2348 }, { - "epoch": 0.25, - "grad_norm": 2.307276114427358, - "learning_rate": 8.813985184238226e-06, - "loss": 0.7653, + "epoch": 0.17, + "grad_norm": 2.094033754795508, + "learning_rate": 9.518079217940799e-06, + "loss": 0.5615, "step": 2349 }, { - "epoch": 0.25, - "grad_norm": 2.273797735104588, - "learning_rate": 8.81288293757465e-06, - "loss": 0.6886, + "epoch": 0.17, + "grad_norm": 2.053521683117637, + "learning_rate": 9.517586860074724e-06, + "loss": 0.5793, "step": 2350 }, { - "epoch": 0.25, - "grad_norm": 2.8942145715992846, - "learning_rate": 8.811780247941354e-06, - "loss": 0.7078, + "epoch": 0.17, + "grad_norm": 1.826958612079418, + "learning_rate": 9.517094263574403e-06, + "loss": 0.6056, "step": 2351 }, { - "epoch": 0.25, - "grad_norm": 2.3063601691994826, - "learning_rate": 8.810677115466451e-06, - "loss": 0.677, + "epoch": 0.17, + "grad_norm": 1.6127263535831167, + "learning_rate": 9.516601428465857e-06, + "loss": 0.5495, "step": 2352 }, { - "epoch": 0.25, - "grad_norm": 2.1245376854410516, - "learning_rate": 8.809573540278094e-06, - "loss": 0.6966, + "epoch": 0.17, + "grad_norm": 2.1467744565821385, + "learning_rate": 9.51610835477512e-06, + "loss": 0.6421, "step": 2353 }, { - "epoch": 0.25, - "grad_norm": 2.8737504795386495, - "learning_rate": 8.808469522504495e-06, - "loss": 0.6428, + "epoch": 0.17, + "grad_norm": 0.8817592667778203, + "learning_rate": 9.515615042528239e-06, + "loss": 0.4935, "step": 2354 }, { - "epoch": 0.25, - "grad_norm": 2.578969771967044, - "learning_rate": 8.807365062273917e-06, - "loss": 0.7379, + "epoch": 0.17, + "grad_norm": 2.0688745499557646, + "learning_rate": 9.515121491751266e-06, + "loss": 0.5634, "step": 2355 }, { - "epoch": 0.25, - "grad_norm": 2.771616985464036, - "learning_rate": 8.806260159714672e-06, - "loss": 0.5943, + "epoch": 0.17, + "grad_norm": 1.5915495462706053, + "learning_rate": 9.51462770247028e-06, + "loss": 0.571, "step": 2356 }, { - "epoch": 0.25, - "grad_norm": 2.0923888169384046, - "learning_rate": 8.805154814955124e-06, - "loss": 0.6965, + "epoch": 0.17, + "grad_norm": 2.123564395079708, + "learning_rate": 9.51413367471136e-06, + "loss": 0.6312, "step": 2357 }, { - "epoch": 0.25, - "grad_norm": 2.4647946308004243, - "learning_rate": 8.80404902812369e-06, - "loss": 0.6628, + "epoch": 0.17, + "grad_norm": 3.484944396888248, + "learning_rate": 9.513639408500604e-06, + "loss": 0.5324, "step": 2358 }, { - "epoch": 0.25, - "grad_norm": 2.8740732083977183, - "learning_rate": 8.802942799348836e-06, - "loss": 0.7136, + "epoch": 0.17, + "grad_norm": 1.9158530769039848, + "learning_rate": 9.513144903864117e-06, + "loss": 0.5574, "step": 2359 }, { - "epoch": 0.25, - "grad_norm": 2.023161383444029, - "learning_rate": 8.80183612875908e-06, - "loss": 0.6532, + "epoch": 0.17, + "grad_norm": 1.574598276136798, + "learning_rate": 9.512650160828027e-06, + "loss": 0.5947, "step": 2360 }, { - "epoch": 0.25, - "grad_norm": 2.2951972055338783, - "learning_rate": 8.800729016482993e-06, - "loss": 0.7056, + "epoch": 0.17, + "grad_norm": 1.8222016352795363, + "learning_rate": 9.512155179418463e-06, + "loss": 0.6258, "step": 2361 }, { - "epoch": 0.25, - "grad_norm": 3.005412561447531, - "learning_rate": 8.799621462649198e-06, - "loss": 0.6511, + "epoch": 0.17, + "grad_norm": 1.958183147702792, + "learning_rate": 9.511659959661575e-06, + "loss": 0.6181, "step": 2362 }, { - "epoch": 0.25, - "grad_norm": 2.347273517470351, - "learning_rate": 8.798513467386361e-06, - "loss": 0.6972, + "epoch": 0.17, + "grad_norm": 1.9894690514747448, + "learning_rate": 9.511164501583519e-06, + "loss": 0.5062, "step": 2363 }, { - "epoch": 0.25, - "grad_norm": 1.2061420190350576, - "learning_rate": 8.797405030823212e-06, - "loss": 0.6577, + "epoch": 0.17, + "grad_norm": 1.5584798523936343, + "learning_rate": 9.510668805210468e-06, + "loss": 0.5024, "step": 2364 }, { - "epoch": 0.25, - "grad_norm": 2.209573303684799, - "learning_rate": 8.796296153088523e-06, - "loss": 0.6624, + "epoch": 0.17, + "grad_norm": 1.645465692877003, + "learning_rate": 9.510172870568606e-06, + "loss": 0.6233, "step": 2365 }, { - "epoch": 0.25, - "grad_norm": 1.0157084559249987, - "learning_rate": 8.79518683431112e-06, - "loss": 0.6218, + "epoch": 0.17, + "grad_norm": 1.4441956456267315, + "learning_rate": 9.509676697684131e-06, + "loss": 0.5358, "step": 2366 }, { - "epoch": 0.25, - "grad_norm": 2.6605469423214507, - "learning_rate": 8.794077074619884e-06, - "loss": 0.6776, + "epoch": 0.17, + "grad_norm": 1.816359462705794, + "learning_rate": 9.509180286583253e-06, + "loss": 0.5712, "step": 2367 }, { - "epoch": 0.25, - "grad_norm": 2.329846735401453, - "learning_rate": 8.79296687414374e-06, - "loss": 0.6263, + "epoch": 0.17, + "grad_norm": 1.5658739742368475, + "learning_rate": 9.508683637292192e-06, + "loss": 0.6337, "step": 2368 }, { - "epoch": 0.25, - "grad_norm": 2.16211910196471, - "learning_rate": 8.791856233011668e-06, - "loss": 0.7662, + "epoch": 0.17, + "grad_norm": 2.561240221894851, + "learning_rate": 9.508186749837182e-06, + "loss": 0.595, "step": 2369 }, { - "epoch": 0.25, - "grad_norm": 2.5132087358973325, - "learning_rate": 8.7907451513527e-06, - "loss": 0.5662, + "epoch": 0.17, + "grad_norm": 1.6652886922147188, + "learning_rate": 9.507689624244477e-06, + "loss": 0.5634, "step": 2370 }, { - "epoch": 0.25, - "grad_norm": 2.4691914644939135, - "learning_rate": 8.78963362929592e-06, - "loss": 0.6809, + "epoch": 0.17, + "grad_norm": 1.6058241057415983, + "learning_rate": 9.507192260540327e-06, + "loss": 0.5319, "step": 2371 }, { - "epoch": 0.25, - "grad_norm": 2.517049664768459, - "learning_rate": 8.788521666970458e-06, - "loss": 0.6717, + "epoch": 0.17, + "grad_norm": 1.6128326781548763, + "learning_rate": 9.506694658751011e-06, + "loss": 0.6192, "step": 2372 }, { - "epoch": 0.25, - "grad_norm": 2.083726661084472, - "learning_rate": 8.7874092645055e-06, - "loss": 0.6727, + "epoch": 0.17, + "grad_norm": 1.5273121975441475, + "learning_rate": 9.506196818902813e-06, + "loss": 0.5349, "step": 2373 }, { - "epoch": 0.25, - "grad_norm": 3.259534102965764, - "learning_rate": 8.786296422030283e-06, - "loss": 0.7514, + "epoch": 0.17, + "grad_norm": 1.7988139656423823, + "learning_rate": 9.50569874102203e-06, + "loss": 0.653, "step": 2374 }, { - "epoch": 0.25, - "grad_norm": 5.0201738332244945, - "learning_rate": 8.785183139674093e-06, - "loss": 0.7253, + "epoch": 0.17, + "grad_norm": 1.577871834516777, + "learning_rate": 9.50520042513497e-06, + "loss": 0.6027, "step": 2375 }, { - "epoch": 0.25, - "grad_norm": 2.7540666834409913, - "learning_rate": 8.784069417566268e-06, - "loss": 0.6923, + "epoch": 0.17, + "grad_norm": 1.7341253318314607, + "learning_rate": 9.504701871267961e-06, + "loss": 0.5152, "step": 2376 }, { - "epoch": 0.25, - "grad_norm": 2.4727794350012213, - "learning_rate": 8.782955255836194e-06, - "loss": 0.6761, + "epoch": 0.17, + "grad_norm": 1.6647582274055037, + "learning_rate": 9.504203079447333e-06, + "loss": 0.5751, "step": 2377 }, { - "epoch": 0.25, - "grad_norm": 2.642090445706794, - "learning_rate": 8.781840654613317e-06, - "loss": 0.6928, + "epoch": 0.17, + "grad_norm": 1.6271514375630778, + "learning_rate": 9.503704049699436e-06, + "loss": 0.5853, "step": 2378 }, { - "epoch": 0.25, - "grad_norm": 2.3378650663235514, - "learning_rate": 8.780725614027123e-06, - "loss": 0.6407, + "epoch": 0.17, + "grad_norm": 1.6956553706206092, + "learning_rate": 9.503204782050631e-06, + "loss": 0.6051, "step": 2379 }, { - "epoch": 0.25, - "grad_norm": 5.5463507569750945, - "learning_rate": 8.779610134207157e-06, - "loss": 0.731, + "epoch": 0.17, + "grad_norm": 1.8799693009905634, + "learning_rate": 9.50270527652729e-06, + "loss": 0.5817, "step": 2380 }, { - "epoch": 0.25, - "grad_norm": 2.476687518226198, - "learning_rate": 8.778494215283011e-06, - "loss": 0.6693, + "epoch": 0.17, + "grad_norm": 1.8132704809033402, + "learning_rate": 9.5022055331558e-06, + "loss": 0.6311, "step": 2381 }, { - "epoch": 0.25, - "grad_norm": 2.6190587133475143, - "learning_rate": 8.777377857384329e-06, - "loss": 0.7043, + "epoch": 0.17, + "grad_norm": 1.995185123556651, + "learning_rate": 9.501705551962558e-06, + "loss": 0.6007, "step": 2382 }, { - "epoch": 0.25, - "grad_norm": 2.276304071392182, - "learning_rate": 8.776261060640807e-06, - "loss": 0.6189, + "epoch": 0.17, + "grad_norm": 1.6644974190828545, + "learning_rate": 9.501205332973974e-06, + "loss": 0.5674, "step": 2383 }, { - "epoch": 0.25, - "grad_norm": 2.5067148512484305, - "learning_rate": 8.775143825182192e-06, - "loss": 0.6429, + "epoch": 0.17, + "grad_norm": 1.7570429565731864, + "learning_rate": 9.500704876216473e-06, + "loss": 0.5248, "step": 2384 }, { - "epoch": 0.25, - "grad_norm": 2.533233559475545, - "learning_rate": 8.77402615113828e-06, - "loss": 0.7218, + "epoch": 0.17, + "grad_norm": 0.8966532297955477, + "learning_rate": 9.50020418171649e-06, + "loss": 0.479, "step": 2385 }, { - "epoch": 0.25, - "grad_norm": 2.3752543251935956, - "learning_rate": 8.77290803863892e-06, - "loss": 0.6615, + "epoch": 0.17, + "grad_norm": 1.6810838603127205, + "learning_rate": 9.499703249500473e-06, + "loss": 0.6016, "step": 2386 }, { - "epoch": 0.25, - "grad_norm": 2.6509057573466417, - "learning_rate": 8.771789487814009e-06, - "loss": 0.6687, + "epoch": 0.17, + "grad_norm": 3.4233757754728775, + "learning_rate": 9.499202079594884e-06, + "loss": 0.5874, "step": 2387 }, { - "epoch": 0.25, - "grad_norm": 2.322946052629726, - "learning_rate": 8.770670498793498e-06, - "loss": 0.71, + "epoch": 0.17, + "grad_norm": 2.49478299949271, + "learning_rate": 9.498700672026195e-06, + "loss": 0.6048, "step": 2388 }, { - "epoch": 0.25, - "grad_norm": 2.555317772836953, - "learning_rate": 8.76955107170739e-06, - "loss": 0.6629, + "epoch": 0.17, + "grad_norm": 1.6611059123839058, + "learning_rate": 9.498199026820894e-06, + "loss": 0.5815, "step": 2389 }, { - "epoch": 0.25, - "grad_norm": 2.0462771901928227, - "learning_rate": 8.768431206685735e-06, - "loss": 0.6768, + "epoch": 0.17, + "grad_norm": 2.941658504425458, + "learning_rate": 9.497697144005476e-06, + "loss": 0.5542, "step": 2390 }, { - "epoch": 0.25, - "grad_norm": 2.4959674203980957, - "learning_rate": 8.767310903858635e-06, - "loss": 0.7051, + "epoch": 0.17, + "grad_norm": 1.93474719920731, + "learning_rate": 9.497195023606457e-06, + "loss": 0.4913, "step": 2391 }, { - "epoch": 0.25, - "grad_norm": 2.400493851416112, - "learning_rate": 8.766190163356243e-06, - "loss": 0.7141, + "epoch": 0.17, + "grad_norm": 1.711677872143683, + "learning_rate": 9.496692665650355e-06, + "loss": 0.5308, "step": 2392 }, { - "epoch": 0.25, - "grad_norm": 2.447160327467119, - "learning_rate": 8.765068985308768e-06, - "loss": 0.6782, + "epoch": 0.17, + "grad_norm": 3.0114360309634494, + "learning_rate": 9.496190070163713e-06, + "loss": 0.5771, "step": 2393 }, { - "epoch": 0.25, - "grad_norm": 2.205789371123814, - "learning_rate": 8.76394736984646e-06, - "loss": 0.6606, + "epoch": 0.17, + "grad_norm": 1.6358048064465478, + "learning_rate": 9.495687237173075e-06, + "loss": 0.5726, "step": 2394 }, { - "epoch": 0.25, - "grad_norm": 3.2524271530733375, - "learning_rate": 8.762825317099628e-06, - "loss": 0.623, + "epoch": 0.17, + "grad_norm": 1.690474124379073, + "learning_rate": 9.495184166705003e-06, + "loss": 0.5449, "step": 2395 }, { - "epoch": 0.25, - "grad_norm": 2.1224476313578493, - "learning_rate": 8.761702827198626e-06, - "loss": 0.6808, + "epoch": 0.17, + "grad_norm": 1.8316524899539741, + "learning_rate": 9.494680858786074e-06, + "loss": 0.5954, "step": 2396 }, { - "epoch": 0.25, - "grad_norm": 1.2653362778423383, - "learning_rate": 8.760579900273865e-06, - "loss": 0.636, + "epoch": 0.17, + "grad_norm": 1.946188987077838, + "learning_rate": 9.49417731344287e-06, + "loss": 0.5979, "step": 2397 }, { - "epoch": 0.25, - "grad_norm": 3.2781604321793023, - "learning_rate": 8.759456536455802e-06, - "loss": 0.6624, + "epoch": 0.17, + "grad_norm": 2.006503034739207, + "learning_rate": 9.493673530701993e-06, + "loss": 0.576, "step": 2398 }, { - "epoch": 0.25, - "grad_norm": 2.4477894582450905, - "learning_rate": 8.758332735874946e-06, - "loss": 0.6002, + "epoch": 0.17, + "grad_norm": 1.6126666836327572, + "learning_rate": 9.493169510590052e-06, + "loss": 0.5213, "step": 2399 }, { - "epoch": 0.25, - "grad_norm": 3.197585400074303, - "learning_rate": 8.757208498661857e-06, - "loss": 0.6718, + "epoch": 0.17, + "grad_norm": 1.7731688046801604, + "learning_rate": 9.492665253133673e-06, + "loss": 0.595, "step": 2400 }, { - "epoch": 0.25, - "grad_norm": 2.278739347444417, - "learning_rate": 8.756083824947145e-06, - "loss": 0.7983, + "epoch": 0.17, + "grad_norm": 1.5357496729022324, + "learning_rate": 9.492160758359491e-06, + "loss": 0.5293, "step": 2401 }, { - "epoch": 0.25, - "grad_norm": 2.585442200635953, - "learning_rate": 8.754958714861474e-06, - "loss": 0.7644, + "epoch": 0.17, + "grad_norm": 1.7717826287482, + "learning_rate": 9.491656026294158e-06, + "loss": 0.6017, "step": 2402 }, { - "epoch": 0.25, - "grad_norm": 3.0314353743944635, - "learning_rate": 8.753833168535551e-06, - "loss": 0.7351, + "epoch": 0.17, + "grad_norm": 2.09912066141132, + "learning_rate": 9.491151056964334e-06, + "loss": 0.586, "step": 2403 }, { - "epoch": 0.25, - "grad_norm": 2.7580999310075356, - "learning_rate": 8.752707186100144e-06, - "loss": 0.6929, + "epoch": 0.17, + "grad_norm": 1.7468342686331428, + "learning_rate": 9.490645850396693e-06, + "loss": 0.5416, "step": 2404 }, { - "epoch": 0.25, - "grad_norm": 2.720900436087719, - "learning_rate": 8.751580767686063e-06, - "loss": 0.6816, + "epoch": 0.17, + "grad_norm": 1.8926179760589417, + "learning_rate": 9.490140406617921e-06, + "loss": 0.6009, "step": 2405 }, { - "epoch": 0.25, - "grad_norm": 2.7870764847387455, - "learning_rate": 8.750453913424172e-06, - "loss": 0.6466, + "epoch": 0.17, + "grad_norm": 2.1847171493603446, + "learning_rate": 9.489634725654718e-06, + "loss": 0.5845, "step": 2406 }, { - "epoch": 0.25, - "grad_norm": 2.5240294889336234, - "learning_rate": 8.74932662344539e-06, - "loss": 0.6982, + "epoch": 0.17, + "grad_norm": 1.4937615229525123, + "learning_rate": 9.489128807533795e-06, + "loss": 0.5908, "step": 2407 }, { - "epoch": 0.25, - "grad_norm": 12.532901910391917, - "learning_rate": 8.748198897880677e-06, - "loss": 0.68, + "epoch": 0.17, + "grad_norm": 1.6160484915305766, + "learning_rate": 9.48862265228188e-06, + "loss": 0.5647, "step": 2408 }, { - "epoch": 0.25, - "grad_norm": 2.2585279019476845, - "learning_rate": 8.747070736861052e-06, - "loss": 0.6859, + "epoch": 0.17, + "grad_norm": 1.884345336285536, + "learning_rate": 9.488116259925706e-06, + "loss": 0.5971, "step": 2409 }, { - "epoch": 0.25, - "grad_norm": 2.950693720936932, - "learning_rate": 8.745942140517579e-06, - "loss": 0.6846, + "epoch": 0.17, + "grad_norm": 2.480142410196229, + "learning_rate": 9.487609630492022e-06, + "loss": 0.5631, "step": 2410 }, { - "epoch": 0.25, - "grad_norm": 2.8818540494898084, - "learning_rate": 8.744813108981377e-06, - "loss": 0.7454, + "epoch": 0.17, + "grad_norm": 1.8078728046006136, + "learning_rate": 9.487102764007592e-06, + "loss": 0.6323, "step": 2411 }, { - "epoch": 0.25, - "grad_norm": 2.03077990529205, - "learning_rate": 8.743683642383613e-06, - "loss": 0.7515, + "epoch": 0.17, + "grad_norm": 2.0944809206074964, + "learning_rate": 9.486595660499189e-06, + "loss": 0.6238, "step": 2412 }, { - "epoch": 0.25, - "grad_norm": 2.3199732724761954, - "learning_rate": 8.742553740855507e-06, - "loss": 0.6335, + "epoch": 0.17, + "grad_norm": 2.394517765849018, + "learning_rate": 9.4860883199936e-06, + "loss": 0.5433, "step": 2413 }, { - "epoch": 0.25, - "grad_norm": 2.5030676274429786, - "learning_rate": 8.741423404528325e-06, - "loss": 0.6373, + "epoch": 0.17, + "grad_norm": 1.8290720132050349, + "learning_rate": 9.485580742517628e-06, + "loss": 0.5781, "step": 2414 }, { - "epoch": 0.25, - "grad_norm": 3.576788831727748, - "learning_rate": 8.740292633533387e-06, - "loss": 0.6511, + "epoch": 0.17, + "grad_norm": 1.9064009309066845, + "learning_rate": 9.48507292809808e-06, + "loss": 0.6145, "step": 2415 }, { - "epoch": 0.25, - "grad_norm": 2.292929750954021, - "learning_rate": 8.739161428002061e-06, - "loss": 0.6665, + "epoch": 0.17, + "grad_norm": 1.8551318214380745, + "learning_rate": 9.484564876761781e-06, + "loss": 0.5448, "step": 2416 }, { - "epoch": 0.25, - "grad_norm": 2.495581313970782, - "learning_rate": 8.738029788065772e-06, - "loss": 0.6986, + "epoch": 0.17, + "grad_norm": 2.8462952433382767, + "learning_rate": 9.484056588535572e-06, + "loss": 0.5399, "step": 2417 }, { - "epoch": 0.25, - "grad_norm": 2.7267216304430106, - "learning_rate": 8.736897713855988e-06, - "loss": 0.6805, + "epoch": 0.17, + "grad_norm": 2.0724372539696714, + "learning_rate": 9.483548063446298e-06, + "loss": 0.5796, "step": 2418 }, { - "epoch": 0.25, - "grad_norm": 2.5310007726717654, - "learning_rate": 8.735765205504228e-06, - "loss": 0.6876, + "epoch": 0.17, + "grad_norm": 1.6784176301687213, + "learning_rate": 9.483039301520824e-06, + "loss": 0.5078, "step": 2419 }, { - "epoch": 0.25, - "grad_norm": 1.2473961744466824, - "learning_rate": 8.734632263142066e-06, - "loss": 0.626, + "epoch": 0.17, + "grad_norm": 1.64963957147287, + "learning_rate": 9.482530302786023e-06, + "loss": 0.6285, "step": 2420 }, { - "epoch": 0.25, - "grad_norm": 2.6025083204104194, - "learning_rate": 8.733498886901123e-06, - "loss": 0.6504, + "epoch": 0.17, + "grad_norm": 2.3877760842652354, + "learning_rate": 9.482021067268782e-06, + "loss": 0.5282, "step": 2421 }, { - "epoch": 0.25, - "grad_norm": 2.885692680206437, - "learning_rate": 8.732365076913072e-06, - "loss": 0.6555, + "epoch": 0.17, + "grad_norm": 1.8976330999110986, + "learning_rate": 9.481511594996002e-06, + "loss": 0.6747, "step": 2422 }, { - "epoch": 0.25, - "grad_norm": 4.032356607171435, - "learning_rate": 8.731230833309637e-06, - "loss": 0.6401, + "epoch": 0.17, + "grad_norm": 2.0969444600793508, + "learning_rate": 9.481001885994595e-06, + "loss": 0.5325, "step": 2423 }, { - "epoch": 0.26, - "grad_norm": 2.1972012220479074, - "learning_rate": 8.730096156222586e-06, - "loss": 0.6757, + "epoch": 0.17, + "grad_norm": 0.9100247921519142, + "learning_rate": 9.480491940291484e-06, + "loss": 0.4672, "step": 2424 }, { - "epoch": 0.26, - "grad_norm": 2.291089360356186, - "learning_rate": 8.728961045783751e-06, - "loss": 0.7047, + "epoch": 0.17, + "grad_norm": 0.8936112495772646, + "learning_rate": 9.479981757913606e-06, + "loss": 0.4736, "step": 2425 }, { - "epoch": 0.26, - "grad_norm": 2.2487186243095816, - "learning_rate": 8.727825502124998e-06, - "loss": 0.7178, + "epoch": 0.17, + "grad_norm": 1.5875963366773258, + "learning_rate": 9.479471338887911e-06, + "loss": 0.5744, "step": 2426 }, { - "epoch": 0.26, - "grad_norm": 2.417453807033552, - "learning_rate": 8.726689525378254e-06, - "loss": 0.6669, + "epoch": 0.17, + "grad_norm": 1.7139757889933747, + "learning_rate": 9.478960683241362e-06, + "loss": 0.5372, "step": 2427 }, { - "epoch": 0.26, - "grad_norm": 2.3540762968676656, - "learning_rate": 8.725553115675496e-06, - "loss": 0.5727, + "epoch": 0.17, + "grad_norm": 1.535768903023024, + "learning_rate": 9.478449791000933e-06, + "loss": 0.5557, "step": 2428 }, { - "epoch": 0.26, - "grad_norm": 4.006417010869224, - "learning_rate": 8.724416273148745e-06, - "loss": 0.6692, + "epoch": 0.17, + "grad_norm": 1.7758958675592578, + "learning_rate": 9.47793866219361e-06, + "loss": 0.6053, "step": 2429 }, { - "epoch": 0.26, - "grad_norm": 2.24813269883343, - "learning_rate": 8.723278997930078e-06, - "loss": 0.6947, + "epoch": 0.17, + "grad_norm": 1.0616471894396087, + "learning_rate": 9.477427296846395e-06, + "loss": 0.4877, "step": 2430 }, { - "epoch": 0.26, - "grad_norm": 2.568072157207673, - "learning_rate": 8.722141290151618e-06, - "loss": 0.6721, + "epoch": 0.17, + "grad_norm": 1.7971349608801896, + "learning_rate": 9.4769156949863e-06, + "loss": 0.532, "step": 2431 }, { - "epoch": 0.26, - "grad_norm": 2.681084669318892, - "learning_rate": 8.721003149945545e-06, - "loss": 0.7217, + "epoch": 0.17, + "grad_norm": 1.7505645109539452, + "learning_rate": 9.476403856640345e-06, + "loss": 0.576, "step": 2432 }, { - "epoch": 0.26, - "grad_norm": 2.5296915450142516, - "learning_rate": 8.719864577444082e-06, - "loss": 0.6596, + "epoch": 0.17, + "grad_norm": 1.6537996105470203, + "learning_rate": 9.475891781835572e-06, + "loss": 0.5429, "step": 2433 }, { - "epoch": 0.26, - "grad_norm": 2.750107096539297, - "learning_rate": 8.718725572779505e-06, - "loss": 0.835, + "epoch": 0.17, + "grad_norm": 1.8266291240575487, + "learning_rate": 9.475379470599027e-06, + "loss": 0.615, "step": 2434 }, { - "epoch": 0.26, - "grad_norm": 2.444233296643607, - "learning_rate": 8.71758613608414e-06, - "loss": 0.7186, + "epoch": 0.17, + "grad_norm": 1.4411971714044132, + "learning_rate": 9.474866922957776e-06, + "loss": 0.51, "step": 2435 }, { - "epoch": 0.26, - "grad_norm": 2.270574624183063, - "learning_rate": 8.716446267490365e-06, - "loss": 0.7578, + "epoch": 0.17, + "grad_norm": 1.4597483685971446, + "learning_rate": 9.474354138938888e-06, + "loss": 0.538, "step": 2436 }, { - "epoch": 0.26, - "grad_norm": 3.8389444303037235, - "learning_rate": 8.715305967130604e-06, - "loss": 0.734, + "epoch": 0.17, + "grad_norm": 2.747895246167512, + "learning_rate": 9.473841118569455e-06, + "loss": 0.6096, "step": 2437 }, { - "epoch": 0.26, - "grad_norm": 1.1979511502659714, - "learning_rate": 8.71416523513734e-06, - "loss": 0.6708, + "epoch": 0.17, + "grad_norm": 1.8618997287988055, + "learning_rate": 9.473327861876576e-06, + "loss": 0.6746, "step": 2438 }, { - "epoch": 0.26, - "grad_norm": 2.8417444339853852, - "learning_rate": 8.713024071643092e-06, - "loss": 0.6287, + "epoch": 0.17, + "grad_norm": 1.6315229863840135, + "learning_rate": 9.47281436888736e-06, + "loss": 0.6554, "step": 2439 }, { - "epoch": 0.26, - "grad_norm": 3.0558749007403, - "learning_rate": 8.71188247678044e-06, - "loss": 0.7621, + "epoch": 0.17, + "grad_norm": 0.8323197766584988, + "learning_rate": 9.472300639628933e-06, + "loss": 0.4757, "step": 2440 }, { - "epoch": 0.26, - "grad_norm": 2.291175152328059, - "learning_rate": 8.710740450682013e-06, - "loss": 0.6667, + "epoch": 0.17, + "grad_norm": 1.5736540854525471, + "learning_rate": 9.471786674128433e-06, + "loss": 0.5553, "step": 2441 }, { - "epoch": 0.26, - "grad_norm": 2.141308772601036, - "learning_rate": 8.709597993480489e-06, - "loss": 0.6674, + "epoch": 0.17, + "grad_norm": 1.6372213193529723, + "learning_rate": 9.47127247241301e-06, + "loss": 0.5423, "step": 2442 }, { - "epoch": 0.26, - "grad_norm": 2.0735451596302137, - "learning_rate": 8.708455105308591e-06, - "loss": 0.6599, + "epoch": 0.17, + "grad_norm": 1.7325522728974998, + "learning_rate": 9.47075803450982e-06, + "loss": 0.5966, "step": 2443 }, { - "epoch": 0.26, - "grad_norm": 2.481850558377454, - "learning_rate": 8.7073117862991e-06, - "loss": 0.783, + "epoch": 0.17, + "grad_norm": 1.8944352469376458, + "learning_rate": 9.470243360446043e-06, + "loss": 0.5749, "step": 2444 }, { - "epoch": 0.26, - "grad_norm": 1.9268542381826397, - "learning_rate": 8.706168036584843e-06, - "loss": 0.6356, + "epoch": 0.17, + "grad_norm": 1.6268722205896935, + "learning_rate": 9.469728450248866e-06, + "loss": 0.5744, "step": 2445 }, { - "epoch": 0.26, - "grad_norm": 4.349321958616555, - "learning_rate": 8.705023856298695e-06, - "loss": 0.611, + "epoch": 0.17, + "grad_norm": 1.877926411376837, + "learning_rate": 9.469213303945486e-06, + "loss": 0.5769, "step": 2446 }, { - "epoch": 0.26, - "grad_norm": 3.1941989903979624, - "learning_rate": 8.703879245573588e-06, - "loss": 0.6716, + "epoch": 0.17, + "grad_norm": 1.8027373940455296, + "learning_rate": 9.468697921563115e-06, + "loss": 0.6028, "step": 2447 }, { - "epoch": 0.26, - "grad_norm": 2.1349012651457664, - "learning_rate": 8.702734204542494e-06, - "loss": 0.7377, + "epoch": 0.17, + "grad_norm": 1.629181965037637, + "learning_rate": 9.46818230312898e-06, + "loss": 0.5655, "step": 2448 }, { - "epoch": 0.26, - "grad_norm": 2.648744197677024, - "learning_rate": 8.701588733338446e-06, - "loss": 0.7413, + "epoch": 0.17, + "grad_norm": 1.6439876084894995, + "learning_rate": 9.467666448670312e-06, + "loss": 0.5465, "step": 2449 }, { - "epoch": 0.26, - "grad_norm": 3.3362813946257623, - "learning_rate": 8.700442832094517e-06, - "loss": 0.5947, + "epoch": 0.17, + "grad_norm": 1.8740474432866674, + "learning_rate": 9.467150358214367e-06, + "loss": 0.551, "step": 2450 }, { - "epoch": 0.26, - "grad_norm": 2.0804046111905614, - "learning_rate": 8.699296500943839e-06, - "loss": 0.6786, + "epoch": 0.17, + "grad_norm": 1.6958393248914008, + "learning_rate": 9.466634031788401e-06, + "loss": 0.6347, "step": 2451 }, { - "epoch": 0.26, - "grad_norm": 2.661217136166758, - "learning_rate": 8.698149740019587e-06, - "loss": 0.7303, + "epoch": 0.17, + "grad_norm": 2.0972736452581824, + "learning_rate": 9.466117469419692e-06, + "loss": 0.671, "step": 2452 }, { - "epoch": 0.26, - "grad_norm": 2.5190865956506547, - "learning_rate": 8.697002549454988e-06, - "loss": 0.7269, + "epoch": 0.17, + "grad_norm": 1.6527815487367117, + "learning_rate": 9.465600671135524e-06, + "loss": 0.5834, "step": 2453 }, { - "epoch": 0.26, - "grad_norm": 3.070053475596138, - "learning_rate": 8.695854929383318e-06, - "loss": 0.7228, + "epoch": 0.17, + "grad_norm": 3.293689600144643, + "learning_rate": 9.465083636963196e-06, + "loss": 0.618, "step": 2454 }, { - "epoch": 0.26, - "grad_norm": 2.441251249535768, - "learning_rate": 8.694706879937909e-06, - "loss": 0.6955, + "epoch": 0.17, + "grad_norm": 1.9213563836986014, + "learning_rate": 9.464566366930022e-06, + "loss": 0.6025, "step": 2455 }, { - "epoch": 0.26, - "grad_norm": 2.2612646874942746, - "learning_rate": 8.693558401252132e-06, - "loss": 0.656, + "epoch": 0.17, + "grad_norm": 3.266886281064554, + "learning_rate": 9.464048861063324e-06, + "loss": 0.5967, "step": 2456 }, { - "epoch": 0.26, - "grad_norm": 2.4863160385294063, - "learning_rate": 8.69240949345942e-06, - "loss": 0.7101, + "epoch": 0.17, + "grad_norm": 1.5537547728101417, + "learning_rate": 9.463531119390439e-06, + "loss": 0.5616, "step": 2457 }, { - "epoch": 0.26, - "grad_norm": 2.43116626808894, - "learning_rate": 8.691260156693245e-06, - "loss": 0.6744, + "epoch": 0.17, + "grad_norm": 1.6559123481610567, + "learning_rate": 9.463013141938717e-06, + "loss": 0.601, "step": 2458 }, { - "epoch": 0.26, - "grad_norm": 2.5200768749768963, - "learning_rate": 8.690110391087134e-06, - "loss": 0.6949, + "epoch": 0.17, + "grad_norm": 1.4638674481982996, + "learning_rate": 9.462494928735516e-06, + "loss": 0.6046, "step": 2459 }, { - "epoch": 0.26, - "grad_norm": 2.525389539161813, - "learning_rate": 8.688960196774668e-06, - "loss": 0.6323, + "epoch": 0.17, + "grad_norm": 1.8826667715435383, + "learning_rate": 9.461976479808213e-06, + "loss": 0.6134, "step": 2460 }, { - "epoch": 0.26, - "grad_norm": 2.102109783358811, - "learning_rate": 8.687809573889467e-06, - "loss": 0.6833, + "epoch": 0.17, + "grad_norm": 1.6524933031858768, + "learning_rate": 9.461457795184192e-06, + "loss": 0.5403, "step": 2461 }, { - "epoch": 0.26, - "grad_norm": 2.222074030086465, - "learning_rate": 8.686658522565211e-06, - "loss": 0.7645, + "epoch": 0.17, + "grad_norm": 1.9242258742765075, + "learning_rate": 9.460938874890855e-06, + "loss": 0.5526, "step": 2462 }, { - "epoch": 0.26, - "grad_norm": 2.6743137482807753, - "learning_rate": 8.685507042935627e-06, - "loss": 0.7335, + "epoch": 0.17, + "grad_norm": 1.567211558753031, + "learning_rate": 9.46041971895561e-06, + "loss": 0.585, "step": 2463 }, { - "epoch": 0.26, - "grad_norm": 2.8113533406525595, - "learning_rate": 8.684355135134486e-06, - "loss": 0.6922, + "epoch": 0.17, + "grad_norm": 1.7738305324116619, + "learning_rate": 9.45990032740588e-06, + "loss": 0.5513, "step": 2464 }, { - "epoch": 0.26, - "grad_norm": 1.9458115809108403, - "learning_rate": 8.683202799295616e-06, - "loss": 0.6556, + "epoch": 0.17, + "grad_norm": 1.0318973246245005, + "learning_rate": 9.459380700269104e-06, + "loss": 0.4787, "step": 2465 }, { - "epoch": 0.26, - "grad_norm": 2.1842127186245333, - "learning_rate": 8.682050035552891e-06, - "loss": 0.6102, + "epoch": 0.17, + "grad_norm": 2.208283710445658, + "learning_rate": 9.458860837572727e-06, + "loss": 0.6566, "step": 2466 }, { - "epoch": 0.26, - "grad_norm": 2.1705972141925733, - "learning_rate": 8.680896844040238e-06, - "loss": 0.706, + "epoch": 0.18, + "grad_norm": 1.7286127321962133, + "learning_rate": 9.458340739344214e-06, + "loss": 0.607, "step": 2467 }, { - "epoch": 0.26, - "grad_norm": 3.2360348544184308, - "learning_rate": 8.67974322489163e-06, - "loss": 0.7269, + "epoch": 0.18, + "grad_norm": 2.1838260016836855, + "learning_rate": 9.457820405611035e-06, + "loss": 0.5885, "step": 2468 }, { - "epoch": 0.26, - "grad_norm": 2.569034689616941, - "learning_rate": 8.678589178241092e-06, - "loss": 0.737, + "epoch": 0.18, + "grad_norm": 0.8413501156024874, + "learning_rate": 9.45729983640068e-06, + "loss": 0.4609, "step": 2469 }, { - "epoch": 0.26, - "grad_norm": 2.729945172388795, - "learning_rate": 8.677434704222697e-06, - "loss": 0.7642, + "epoch": 0.18, + "grad_norm": 1.7089212154497195, + "learning_rate": 9.456779031740642e-06, + "loss": 0.5292, "step": 2470 }, { - "epoch": 0.26, - "grad_norm": 2.577038874216306, - "learning_rate": 8.676279802970566e-06, - "loss": 0.6741, + "epoch": 0.18, + "grad_norm": 0.8931667656166066, + "learning_rate": 9.456257991658433e-06, + "loss": 0.4595, "step": 2471 }, { - "epoch": 0.26, - "grad_norm": 3.477474813381419, - "learning_rate": 8.675124474618876e-06, - "loss": 0.6524, + "epoch": 0.18, + "grad_norm": 1.6195460412201779, + "learning_rate": 9.455736716181576e-06, + "loss": 0.5718, "step": 2472 }, { - "epoch": 0.26, - "grad_norm": 2.281055787854727, - "learning_rate": 8.673968719301849e-06, - "loss": 0.6976, + "epoch": 0.18, + "grad_norm": 1.968128516667588, + "learning_rate": 9.455215205337612e-06, + "loss": 0.5466, "step": 2473 }, { - "epoch": 0.26, - "grad_norm": 2.506648427439492, - "learning_rate": 8.672812537153757e-06, - "loss": 0.6998, + "epoch": 0.18, + "grad_norm": 1.830871541462981, + "learning_rate": 9.45469345915408e-06, + "loss": 0.5387, "step": 2474 }, { - "epoch": 0.26, - "grad_norm": 4.568840294148294, - "learning_rate": 8.67165592830892e-06, - "loss": 0.692, + "epoch": 0.18, + "grad_norm": 1.7696041860215133, + "learning_rate": 9.454171477658548e-06, + "loss": 0.5541, "step": 2475 }, { - "epoch": 0.26, - "grad_norm": 2.257378434912131, - "learning_rate": 8.670498892901712e-06, - "loss": 0.7227, + "epoch": 0.18, + "grad_norm": 5.293294137752951, + "learning_rate": 9.453649260878583e-06, + "loss": 0.5697, "step": 2476 }, { - "epoch": 0.26, - "grad_norm": 1.9597763138644833, - "learning_rate": 8.669341431066552e-06, - "loss": 0.6658, + "epoch": 0.18, + "grad_norm": 1.8133653756042332, + "learning_rate": 9.453126808841775e-06, + "loss": 0.6262, "step": 2477 }, { - "epoch": 0.26, - "grad_norm": 2.4823038278036846, - "learning_rate": 8.668183542937912e-06, - "loss": 0.6355, + "epoch": 0.18, + "grad_norm": 2.1386056896703494, + "learning_rate": 9.45260412157572e-06, + "loss": 0.5532, "step": 2478 }, { - "epoch": 0.26, - "grad_norm": 2.2040856463646663, - "learning_rate": 8.66702522865031e-06, - "loss": 0.6359, + "epoch": 0.18, + "grad_norm": 1.4537159147583336, + "learning_rate": 9.452081199108027e-06, + "loss": 0.6062, "step": 2479 }, { - "epoch": 0.26, - "grad_norm": 2.7952172523154166, - "learning_rate": 8.66586648833832e-06, - "loss": 0.684, + "epoch": 0.18, + "grad_norm": 1.7268718846725477, + "learning_rate": 9.45155804146632e-06, + "loss": 0.5779, "step": 2480 }, { - "epoch": 0.26, - "grad_norm": 2.400485734318114, - "learning_rate": 8.664707322136556e-06, - "loss": 0.6433, + "epoch": 0.18, + "grad_norm": 2.0932544780773954, + "learning_rate": 9.451034648678232e-06, + "loss": 0.5101, "step": 2481 }, { - "epoch": 0.26, - "grad_norm": 2.306795903415676, - "learning_rate": 8.663547730179692e-06, - "loss": 0.651, + "epoch": 0.18, + "grad_norm": 1.658577362774394, + "learning_rate": 9.450511020771413e-06, + "loss": 0.532, "step": 2482 }, { - "epoch": 0.26, - "grad_norm": 4.985126814934233, - "learning_rate": 8.662387712602438e-06, - "loss": 0.6954, + "epoch": 0.18, + "grad_norm": 2.0597679198817476, + "learning_rate": 9.44998715777352e-06, + "loss": 0.6297, "step": 2483 }, { - "epoch": 0.26, - "grad_norm": 2.2505071915852075, - "learning_rate": 8.661227269539572e-06, - "loss": 0.6472, + "epoch": 0.18, + "grad_norm": 1.7359347422340632, + "learning_rate": 9.449463059712228e-06, + "loss": 0.5324, "step": 2484 }, { - "epoch": 0.26, - "grad_norm": 1.8826833418351787, - "learning_rate": 8.660066401125902e-06, - "loss": 0.6311, + "epoch": 0.18, + "grad_norm": 1.5653600172103501, + "learning_rate": 9.44893872661522e-06, + "loss": 0.5413, "step": 2485 }, { - "epoch": 0.26, - "grad_norm": 6.818214783238789, - "learning_rate": 8.658905107496299e-06, - "loss": 0.6617, + "epoch": 0.18, + "grad_norm": 1.9908448220712383, + "learning_rate": 9.448414158510194e-06, + "loss": 0.631, "step": 2486 }, { - "epoch": 0.26, - "grad_norm": 2.6246974001427716, - "learning_rate": 8.657743388785676e-06, - "loss": 0.7234, + "epoch": 0.18, + "grad_norm": 1.8941959702533835, + "learning_rate": 9.447889355424858e-06, + "loss": 0.5601, "step": 2487 }, { - "epoch": 0.26, - "grad_norm": 2.5053395649894235, - "learning_rate": 8.656581245129e-06, - "loss": 0.7851, + "epoch": 0.18, + "grad_norm": 1.7373479429967977, + "learning_rate": 9.447364317386935e-06, + "loss": 0.5526, "step": 2488 }, { - "epoch": 0.26, - "grad_norm": 2.5788879956613666, - "learning_rate": 8.655418676661285e-06, - "loss": 0.6737, + "epoch": 0.18, + "grad_norm": 1.8593675232387306, + "learning_rate": 9.446839044424158e-06, + "loss": 0.6303, "step": 2489 }, { - "epoch": 0.26, - "grad_norm": 2.5695918451833752, - "learning_rate": 8.654255683517595e-06, - "loss": 0.66, + "epoch": 0.18, + "grad_norm": 0.9787391183006106, + "learning_rate": 9.446313536564278e-06, + "loss": 0.4503, "step": 2490 }, { - "epoch": 0.26, - "grad_norm": 2.2259903673736456, - "learning_rate": 8.653092265833044e-06, - "loss": 0.7476, + "epoch": 0.18, + "grad_norm": 1.7460381865849395, + "learning_rate": 9.445787793835048e-06, + "loss": 0.5517, "step": 2491 }, { - "epoch": 0.26, - "grad_norm": 3.0361177334211376, - "learning_rate": 8.651928423742793e-06, - "loss": 0.6448, + "epoch": 0.18, + "grad_norm": 1.796430503408903, + "learning_rate": 9.445261816264243e-06, + "loss": 0.5405, "step": 2492 }, { - "epoch": 0.26, - "grad_norm": 2.2871956578155253, - "learning_rate": 8.650764157382054e-06, - "loss": 0.6647, + "epoch": 0.18, + "grad_norm": 1.6741002548959194, + "learning_rate": 9.444735603879646e-06, + "loss": 0.5813, "step": 2493 }, { - "epoch": 0.26, - "grad_norm": 4.278582756268501, - "learning_rate": 8.64959946688609e-06, - "loss": 0.663, + "epoch": 0.18, + "grad_norm": 2.065248524577058, + "learning_rate": 9.444209156709054e-06, + "loss": 0.6159, "step": 2494 }, { - "epoch": 0.26, - "grad_norm": 2.5143339131874147, - "learning_rate": 8.648434352390209e-06, - "loss": 0.6837, + "epoch": 0.18, + "grad_norm": 1.699939509821089, + "learning_rate": 9.443682474780276e-06, + "loss": 0.5873, "step": 2495 }, { - "epoch": 0.26, - "grad_norm": 2.4594476956792586, - "learning_rate": 8.64726881402977e-06, - "loss": 0.6644, + "epoch": 0.18, + "grad_norm": 1.892619412511557, + "learning_rate": 9.44315555812113e-06, + "loss": 0.5854, "step": 2496 }, { - "epoch": 0.26, - "grad_norm": 3.1170695576353, - "learning_rate": 8.646102851940184e-06, - "loss": 0.766, + "epoch": 0.18, + "grad_norm": 1.7561164698563279, + "learning_rate": 9.442628406759453e-06, + "loss": 0.5985, "step": 2497 }, { - "epoch": 0.26, - "grad_norm": 2.2169468874210065, - "learning_rate": 8.64493646625691e-06, - "loss": 0.664, + "epoch": 0.18, + "grad_norm": 2.0257378001467203, + "learning_rate": 9.442101020723091e-06, + "loss": 0.5658, "step": 2498 }, { - "epoch": 0.26, - "grad_norm": 3.0516103481287034, - "learning_rate": 8.643769657115452e-06, - "loss": 0.6429, + "epoch": 0.18, + "grad_norm": 1.9962020537982097, + "learning_rate": 9.4415734000399e-06, + "loss": 0.5334, "step": 2499 }, { - "epoch": 0.26, - "grad_norm": 3.027841909715622, - "learning_rate": 8.642602424651369e-06, - "loss": 0.7947, + "epoch": 0.18, + "grad_norm": 1.8605045222239138, + "learning_rate": 9.441045544737754e-06, + "loss": 0.607, "step": 2500 }, { - "epoch": 0.26, - "grad_norm": 2.292377856177173, - "learning_rate": 8.641434769000267e-06, - "loss": 0.6661, + "epoch": 0.18, + "grad_norm": 1.6947285427500238, + "learning_rate": 9.440517454844533e-06, + "loss": 0.5146, "step": 2501 }, { - "epoch": 0.26, - "grad_norm": 2.591736810360195, - "learning_rate": 8.640266690297797e-06, - "loss": 0.5915, + "epoch": 0.18, + "grad_norm": 1.5738822211526065, + "learning_rate": 9.439989130388131e-06, + "loss": 0.5358, "step": 2502 }, { - "epoch": 0.26, - "grad_norm": 2.751465380246021, - "learning_rate": 8.639098188679668e-06, - "loss": 0.6897, + "epoch": 0.18, + "grad_norm": 1.5443720923897075, + "learning_rate": 9.439460571396462e-06, + "loss": 0.6456, "step": 2503 }, { - "epoch": 0.26, - "grad_norm": 2.0621395224891077, - "learning_rate": 8.637929264281632e-06, - "loss": 0.6687, + "epoch": 0.18, + "grad_norm": 1.5259712021550462, + "learning_rate": 9.43893177789744e-06, + "loss": 0.5691, "step": 2504 }, { - "epoch": 0.26, - "grad_norm": 1.16826930507742, - "learning_rate": 8.63675991723949e-06, - "loss": 0.6106, + "epoch": 0.18, + "grad_norm": 2.2429947446680636, + "learning_rate": 9.438402749919002e-06, + "loss": 0.6197, "step": 2505 }, { - "epoch": 0.26, - "grad_norm": 3.7332955065147577, - "learning_rate": 8.635590147689092e-06, - "loss": 0.7138, + "epoch": 0.18, + "grad_norm": 3.0806867086018674, + "learning_rate": 9.43787348748909e-06, + "loss": 0.5443, "step": 2506 }, { - "epoch": 0.26, - "grad_norm": 2.7397582619681597, - "learning_rate": 8.634419955766342e-06, - "loss": 0.6154, + "epoch": 0.18, + "grad_norm": 1.7401220095691765, + "learning_rate": 9.437343990635663e-06, + "loss": 0.5429, "step": 2507 }, { - "epoch": 0.26, - "grad_norm": 2.6765754635699213, - "learning_rate": 8.633249341607186e-06, - "loss": 0.6402, + "epoch": 0.18, + "grad_norm": 2.330594642721548, + "learning_rate": 9.436814259386694e-06, + "loss": 0.6389, "step": 2508 }, { - "epoch": 0.26, - "grad_norm": 1.147939650717752, - "learning_rate": 8.632078305347623e-06, - "loss": 0.6187, + "epoch": 0.18, + "grad_norm": 1.595794243548415, + "learning_rate": 9.436284293770157e-06, + "loss": 0.5665, "step": 2509 }, { - "epoch": 0.26, - "grad_norm": 2.5410600160091876, - "learning_rate": 8.630906847123704e-06, - "loss": 0.7342, + "epoch": 0.18, + "grad_norm": 1.8816628668531936, + "learning_rate": 9.435754093814053e-06, + "loss": 0.549, "step": 2510 }, { - "epoch": 0.26, - "grad_norm": 2.2309952636873485, - "learning_rate": 8.629734967071522e-06, - "loss": 0.6337, + "epoch": 0.18, + "grad_norm": 1.86407046519396, + "learning_rate": 9.435223659546389e-06, + "loss": 0.5366, "step": 2511 }, { - "epoch": 0.26, - "grad_norm": 2.221441936274778, - "learning_rate": 8.628562665327224e-06, - "loss": 0.6374, + "epoch": 0.18, + "grad_norm": 0.9612606756888484, + "learning_rate": 9.434692990995181e-06, + "loss": 0.4807, "step": 2512 }, { - "epoch": 0.26, - "grad_norm": 2.4825649248711654, - "learning_rate": 8.627389942027008e-06, - "loss": 0.7285, + "epoch": 0.18, + "grad_norm": 2.8304292812031946, + "learning_rate": 9.434162088188464e-06, + "loss": 0.6013, "step": 2513 }, { - "epoch": 0.26, - "grad_norm": 3.3071050264366133, - "learning_rate": 8.62621679730711e-06, - "loss": 0.5995, + "epoch": 0.18, + "grad_norm": 1.72785573229462, + "learning_rate": 9.433630951154278e-06, + "loss": 0.6219, "step": 2514 }, { - "epoch": 0.26, - "grad_norm": 1.9177350717101802, - "learning_rate": 8.62504323130383e-06, - "loss": 0.7131, + "epoch": 0.18, + "grad_norm": 2.0150275618271456, + "learning_rate": 9.433099579920686e-06, + "loss": 0.6811, "step": 2515 }, { - "epoch": 0.26, - "grad_norm": 2.510316012626707, - "learning_rate": 8.623869244153504e-06, - "loss": 0.7768, + "epoch": 0.18, + "grad_norm": 1.9641678805916127, + "learning_rate": 9.43256797451575e-06, + "loss": 0.5586, "step": 2516 }, { - "epoch": 0.26, - "grad_norm": 3.497041444497354, - "learning_rate": 8.622694835992525e-06, - "loss": 0.6689, + "epoch": 0.18, + "grad_norm": 1.6261932823014627, + "learning_rate": 9.432036134967552e-06, + "loss": 0.5976, "step": 2517 }, { - "epoch": 0.26, - "grad_norm": 2.564165380926844, - "learning_rate": 8.621520006957334e-06, - "loss": 0.7526, + "epoch": 0.18, + "grad_norm": 1.5952357192895406, + "learning_rate": 9.431504061304191e-06, + "loss": 0.5902, "step": 2518 }, { - "epoch": 0.27, - "grad_norm": 2.6172118910010704, - "learning_rate": 8.620344757184415e-06, - "loss": 0.6911, + "epoch": 0.18, + "grad_norm": 1.6511831482307644, + "learning_rate": 9.430971753553768e-06, + "loss": 0.586, "step": 2519 }, { - "epoch": 0.27, - "grad_norm": 2.2241203927304367, - "learning_rate": 8.619169086810308e-06, - "loss": 0.7166, + "epoch": 0.18, + "grad_norm": 1.7341545021843054, + "learning_rate": 9.430439211744403e-06, + "loss": 0.538, "step": 2520 }, { - "epoch": 0.27, - "grad_norm": 3.3008760240604422, - "learning_rate": 8.6179929959716e-06, - "loss": 0.7909, + "epoch": 0.18, + "grad_norm": 1.8534417689280291, + "learning_rate": 9.429906435904226e-06, + "loss": 0.6242, "step": 2521 }, { - "epoch": 0.27, - "grad_norm": 3.3505911458947626, - "learning_rate": 8.616816484804922e-06, - "loss": 0.7658, + "epoch": 0.18, + "grad_norm": 1.6296209594088824, + "learning_rate": 9.429373426061382e-06, + "loss": 0.6291, "step": 2522 }, { - "epoch": 0.27, - "grad_norm": 4.268553301879235, - "learning_rate": 8.615639553446961e-06, - "loss": 0.642, + "epoch": 0.18, + "grad_norm": 1.6322217607924494, + "learning_rate": 9.428840182244024e-06, + "loss": 0.562, "step": 2523 }, { - "epoch": 0.27, - "grad_norm": 2.9320366440133188, - "learning_rate": 8.614462202034449e-06, - "loss": 0.6443, + "epoch": 0.18, + "grad_norm": 2.1230564844811566, + "learning_rate": 9.428306704480322e-06, + "loss": 0.6369, "step": 2524 }, { - "epoch": 0.27, - "grad_norm": 2.5459832781821428, - "learning_rate": 8.613284430704165e-06, - "loss": 0.662, + "epoch": 0.18, + "grad_norm": 1.8410276193645816, + "learning_rate": 9.427772992798452e-06, + "loss": 0.566, "step": 2525 }, { - "epoch": 0.27, - "grad_norm": 2.4781573079223422, - "learning_rate": 8.612106239592944e-06, - "loss": 0.6541, + "epoch": 0.18, + "grad_norm": 1.6649437644322929, + "learning_rate": 9.42723904722661e-06, + "loss": 0.5851, "step": 2526 }, { - "epoch": 0.27, - "grad_norm": 1.971517376818193, - "learning_rate": 8.610927628837658e-06, - "loss": 0.6708, + "epoch": 0.18, + "grad_norm": 2.2667465662235506, + "learning_rate": 9.426704867793001e-06, + "loss": 0.6399, "step": 2527 }, { - "epoch": 0.27, - "grad_norm": 2.465844796112776, - "learning_rate": 8.60974859857524e-06, - "loss": 0.6641, + "epoch": 0.18, + "grad_norm": 3.5862872672793813, + "learning_rate": 9.42617045452584e-06, + "loss": 0.5796, "step": 2528 }, { - "epoch": 0.27, - "grad_norm": 2.3666940157342347, - "learning_rate": 8.608569148942664e-06, - "loss": 0.6648, + "epoch": 0.18, + "grad_norm": 1.9409912942537895, + "learning_rate": 9.42563580745336e-06, + "loss": 0.5914, "step": 2529 }, { - "epoch": 0.27, - "grad_norm": 3.3906416500624106, - "learning_rate": 8.607389280076956e-06, - "loss": 0.7074, + "epoch": 0.18, + "grad_norm": 1.7506537627776533, + "learning_rate": 9.425100926603799e-06, + "loss": 0.6026, "step": 2530 }, { - "epoch": 0.27, - "grad_norm": 2.55985900963281, - "learning_rate": 8.606208992115191e-06, - "loss": 0.6741, + "epoch": 0.18, + "grad_norm": 2.5094217416300593, + "learning_rate": 9.424565812005411e-06, + "loss": 0.5734, "step": 2531 }, { - "epoch": 0.27, - "grad_norm": 2.088312059952641, - "learning_rate": 8.605028285194487e-06, - "loss": 0.6705, + "epoch": 0.18, + "grad_norm": 4.44513713673995, + "learning_rate": 9.424030463686466e-06, + "loss": 0.5555, "step": 2532 }, { - "epoch": 0.27, - "grad_norm": 4.4291577802630435, - "learning_rate": 8.60384715945202e-06, - "loss": 0.7239, + "epoch": 0.18, + "grad_norm": 2.2005453614142296, + "learning_rate": 9.423494881675242e-06, + "loss": 0.6323, "step": 2533 }, { - "epoch": 0.27, - "grad_norm": 5.431296481740759, - "learning_rate": 8.602665615025006e-06, - "loss": 0.6131, + "epoch": 0.18, + "grad_norm": 1.682457195888122, + "learning_rate": 9.422959066000029e-06, + "loss": 0.5821, "step": 2534 }, { - "epoch": 0.27, - "grad_norm": 1.9635644510309531, - "learning_rate": 8.601483652050717e-06, - "loss": 0.5776, + "epoch": 0.18, + "grad_norm": 1.8711716474238234, + "learning_rate": 9.42242301668913e-06, + "loss": 0.5501, "step": 2535 }, { - "epoch": 0.27, - "grad_norm": 2.460305744619151, - "learning_rate": 8.600301270666467e-06, - "loss": 0.7602, + "epoch": 0.18, + "grad_norm": 1.5785557628600095, + "learning_rate": 9.421886733770863e-06, + "loss": 0.5581, "step": 2536 }, { - "epoch": 0.27, - "grad_norm": 1.2925216721758674, - "learning_rate": 8.599118471009622e-06, - "loss": 0.6207, + "epoch": 0.18, + "grad_norm": 1.7419325115233442, + "learning_rate": 9.421350217273555e-06, + "loss": 0.6211, "step": 2537 }, { - "epoch": 0.27, - "grad_norm": 2.4578397320289453, - "learning_rate": 8.597935253217598e-06, - "loss": 0.7284, + "epoch": 0.18, + "grad_norm": 2.283735851004028, + "learning_rate": 9.420813467225547e-06, + "loss": 0.5394, "step": 2538 }, { - "epoch": 0.27, - "grad_norm": 2.148544072195046, - "learning_rate": 8.596751617427856e-06, - "loss": 0.609, + "epoch": 0.18, + "grad_norm": 0.910097108861766, + "learning_rate": 9.420276483655192e-06, + "loss": 0.5071, "step": 2539 }, { - "epoch": 0.27, - "grad_norm": 3.5641003536407068, - "learning_rate": 8.595567563777909e-06, - "loss": 0.6304, + "epoch": 0.18, + "grad_norm": 2.0602061273946553, + "learning_rate": 9.419739266590854e-06, + "loss": 0.5499, "step": 2540 }, { - "epoch": 0.27, - "grad_norm": 2.5993299217825125, - "learning_rate": 8.594383092405317e-06, - "loss": 0.773, + "epoch": 0.18, + "grad_norm": 1.7442449169147471, + "learning_rate": 9.419201816060914e-06, + "loss": 0.5301, "step": 2541 }, { - "epoch": 0.27, - "grad_norm": 4.261057636960014, - "learning_rate": 8.593198203447685e-06, - "loss": 0.6816, + "epoch": 0.18, + "grad_norm": 1.8375513943186386, + "learning_rate": 9.41866413209376e-06, + "loss": 0.6229, "step": 2542 }, { - "epoch": 0.27, - "grad_norm": 2.7270686627916243, - "learning_rate": 8.592012897042677e-06, - "loss": 0.6008, + "epoch": 0.18, + "grad_norm": 0.8715300189026357, + "learning_rate": 9.418126214717792e-06, + "loss": 0.4529, "step": 2543 }, { - "epoch": 0.27, - "grad_norm": 2.593307670860837, - "learning_rate": 8.59082717332799e-06, - "loss": 0.7299, + "epoch": 0.18, + "grad_norm": 1.999479686717617, + "learning_rate": 9.417588063961428e-06, + "loss": 0.61, "step": 2544 }, { - "epoch": 0.27, - "grad_norm": 2.701277864197115, - "learning_rate": 8.589641032441384e-06, - "loss": 0.6848, + "epoch": 0.18, + "grad_norm": 1.8746662741308286, + "learning_rate": 9.417049679853093e-06, + "loss": 0.663, "step": 2545 }, { - "epoch": 0.27, - "grad_norm": 2.085418911458651, - "learning_rate": 8.588454474520657e-06, - "loss": 0.6909, + "epoch": 0.18, + "grad_norm": 2.1810014271872875, + "learning_rate": 9.416511062421228e-06, + "loss": 0.5506, "step": 2546 }, { - "epoch": 0.27, - "grad_norm": 2.8232006350273355, - "learning_rate": 8.587267499703667e-06, - "loss": 0.6507, + "epoch": 0.18, + "grad_norm": 1.7806649827539918, + "learning_rate": 9.415972211694282e-06, + "loss": 0.5191, "step": 2547 }, { - "epoch": 0.27, - "grad_norm": 2.4387883280629845, - "learning_rate": 8.586080108128304e-06, - "loss": 0.7263, + "epoch": 0.18, + "grad_norm": 1.630835975169748, + "learning_rate": 9.415433127700722e-06, + "loss": 0.6108, "step": 2548 }, { - "epoch": 0.27, - "grad_norm": 3.2908278171042737, - "learning_rate": 8.584892299932523e-06, - "loss": 0.6689, + "epoch": 0.18, + "grad_norm": 1.7343846734103776, + "learning_rate": 9.414893810469021e-06, + "loss": 0.5987, "step": 2549 }, { - "epoch": 0.27, - "grad_norm": 2.573968919899555, - "learning_rate": 8.583704075254315e-06, - "loss": 0.7485, + "epoch": 0.18, + "grad_norm": 1.9192719975727983, + "learning_rate": 9.41435426002767e-06, + "loss": 0.5431, "step": 2550 }, { - "epoch": 0.27, - "grad_norm": 1.9765316518612366, - "learning_rate": 8.582515434231729e-06, - "loss": 0.6095, + "epoch": 0.18, + "grad_norm": 1.596266231846583, + "learning_rate": 9.413814476405168e-06, + "loss": 0.5753, "step": 2551 }, { - "epoch": 0.27, - "grad_norm": 1.1051643768949975, - "learning_rate": 8.581326377002857e-06, - "loss": 0.6412, + "epoch": 0.18, + "grad_norm": 2.244285792373908, + "learning_rate": 9.41327445963003e-06, + "loss": 0.5607, "step": 2552 }, { - "epoch": 0.27, - "grad_norm": 2.0858357891432138, - "learning_rate": 8.580136903705838e-06, - "loss": 0.7277, + "epoch": 0.18, + "grad_norm": 1.807663120696747, + "learning_rate": 9.412734209730782e-06, + "loss": 0.5663, "step": 2553 }, { - "epoch": 0.27, - "grad_norm": 2.3915626172098765, - "learning_rate": 8.578947014478861e-06, - "loss": 0.6705, + "epoch": 0.18, + "grad_norm": 2.6825627736488413, + "learning_rate": 9.41219372673596e-06, + "loss": 0.5886, "step": 2554 }, { - "epoch": 0.27, - "grad_norm": 2.234544105188988, - "learning_rate": 8.577756709460167e-06, - "loss": 0.6751, + "epoch": 0.18, + "grad_norm": 1.7042636129641382, + "learning_rate": 9.411653010674114e-06, + "loss": 0.5531, "step": 2555 }, { - "epoch": 0.27, - "grad_norm": 2.99895822558418, - "learning_rate": 8.576565988788042e-06, - "loss": 0.6858, + "epoch": 0.18, + "grad_norm": 1.6893844849539452, + "learning_rate": 9.411112061573808e-06, + "loss": 0.5929, "step": 2556 }, { - "epoch": 0.27, - "grad_norm": 2.176379192047237, - "learning_rate": 8.575374852600816e-06, - "loss": 0.7022, + "epoch": 0.18, + "grad_norm": 2.03475529720633, + "learning_rate": 9.410570879463617e-06, + "loss": 0.6023, "step": 2557 }, { - "epoch": 0.27, - "grad_norm": 2.882909865354107, - "learning_rate": 8.574183301036877e-06, - "loss": 0.7089, + "epoch": 0.18, + "grad_norm": 1.5236866802479025, + "learning_rate": 9.410029464372126e-06, + "loss": 0.6155, "step": 2558 }, { - "epoch": 0.27, - "grad_norm": 2.6446090141728487, - "learning_rate": 8.572991334234654e-06, - "loss": 0.7491, + "epoch": 0.18, + "grad_norm": 1.998713624508661, + "learning_rate": 9.409487816327935e-06, + "loss": 0.5795, "step": 2559 }, { - "epoch": 0.27, - "grad_norm": 2.631308560093212, - "learning_rate": 8.571798952332625e-06, - "loss": 0.7464, + "epoch": 0.18, + "grad_norm": 1.7674547717004658, + "learning_rate": 9.408945935359656e-06, + "loss": 0.5942, "step": 2560 }, { - "epoch": 0.27, - "grad_norm": 2.3075799632747174, - "learning_rate": 8.57060615546932e-06, - "loss": 0.6408, + "epoch": 0.18, + "grad_norm": 0.8598829338566049, + "learning_rate": 9.408403821495915e-06, + "loss": 0.4568, "step": 2561 }, { - "epoch": 0.27, - "grad_norm": 3.280083800016366, - "learning_rate": 8.569412943783313e-06, - "loss": 0.6251, + "epoch": 0.18, + "grad_norm": 1.6693526711561928, + "learning_rate": 9.407861474765343e-06, + "loss": 0.5867, "step": 2562 }, { - "epoch": 0.27, - "grad_norm": 2.37287864006488, - "learning_rate": 8.56821931741323e-06, - "loss": 0.609, + "epoch": 0.18, + "grad_norm": 1.6207470744890085, + "learning_rate": 9.407318895196596e-06, + "loss": 0.6346, "step": 2563 }, { - "epoch": 0.27, - "grad_norm": 2.5322472513374734, - "learning_rate": 8.567025276497739e-06, - "loss": 0.7268, + "epoch": 0.18, + "grad_norm": 1.8933049005756721, + "learning_rate": 9.406776082818328e-06, + "loss": 0.5791, "step": 2564 }, { - "epoch": 0.27, - "grad_norm": 2.4974087371265776, - "learning_rate": 8.565830821175563e-06, - "loss": 0.7007, + "epoch": 0.18, + "grad_norm": 3.8190919205595097, + "learning_rate": 9.406233037659217e-06, + "loss": 0.5319, "step": 2565 }, { - "epoch": 0.27, - "grad_norm": 2.042712640061327, - "learning_rate": 8.56463595158547e-06, - "loss": 0.6754, + "epoch": 0.18, + "grad_norm": 1.6607986950528482, + "learning_rate": 9.405689759747946e-06, + "loss": 0.5233, "step": 2566 }, { - "epoch": 0.27, - "grad_norm": 3.414866039042617, - "learning_rate": 8.563440667866278e-06, - "loss": 0.5793, + "epoch": 0.18, + "grad_norm": 2.1439716863689484, + "learning_rate": 9.405146249113213e-06, + "loss": 0.6099, "step": 2567 }, { - "epoch": 0.27, - "grad_norm": 1.186808534700377, - "learning_rate": 8.56224497015685e-06, - "loss": 0.6083, + "epoch": 0.18, + "grad_norm": 1.6717084907491306, + "learning_rate": 9.404602505783729e-06, + "loss": 0.5271, "step": 2568 }, { - "epoch": 0.27, - "grad_norm": 2.7550982917448628, - "learning_rate": 8.561048858596097e-06, - "loss": 0.6927, + "epoch": 0.18, + "grad_norm": 1.9005525310471492, + "learning_rate": 9.404058529788214e-06, + "loss": 0.6003, "step": 2569 }, { - "epoch": 0.27, - "grad_norm": 3.307539577711527, - "learning_rate": 8.559852333322982e-06, - "loss": 0.7618, + "epoch": 0.18, + "grad_norm": 0.8465762075878627, + "learning_rate": 9.403514321155407e-06, + "loss": 0.445, "step": 2570 }, { - "epoch": 0.27, - "grad_norm": 2.6597721789148605, - "learning_rate": 8.558655394476513e-06, - "loss": 0.7079, + "epoch": 0.18, + "grad_norm": 1.7114607612251587, + "learning_rate": 9.402969879914051e-06, + "loss": 0.5756, "step": 2571 }, { - "epoch": 0.27, - "grad_norm": 2.345267249284243, - "learning_rate": 8.557458042195748e-06, - "loss": 0.723, + "epoch": 0.18, + "grad_norm": 1.5336073288816168, + "learning_rate": 9.402425206092906e-06, + "loss": 0.5972, "step": 2572 }, { - "epoch": 0.27, - "grad_norm": 2.194152292832171, - "learning_rate": 8.556260276619792e-06, - "loss": 0.7222, + "epoch": 0.18, + "grad_norm": 2.3566428881877615, + "learning_rate": 9.401880299720747e-06, + "loss": 0.5831, "step": 2573 }, { - "epoch": 0.27, - "grad_norm": 2.8628566104318725, - "learning_rate": 8.555062097887796e-06, - "loss": 0.6131, + "epoch": 0.18, + "grad_norm": 1.5540333849491075, + "learning_rate": 9.401335160826352e-06, + "loss": 0.5545, "step": 2574 }, { - "epoch": 0.27, - "grad_norm": 2.2175177567930087, - "learning_rate": 8.553863506138962e-06, - "loss": 0.6847, + "epoch": 0.18, + "grad_norm": 1.7797010533376645, + "learning_rate": 9.400789789438523e-06, + "loss": 0.5342, "step": 2575 }, { - "epoch": 0.27, - "grad_norm": 5.19010534016988, - "learning_rate": 8.55266450151254e-06, - "loss": 0.6466, + "epoch": 0.18, + "grad_norm": 1.626589517172223, + "learning_rate": 9.400244185586063e-06, + "loss": 0.5823, "step": 2576 }, { - "epoch": 0.27, - "grad_norm": 3.030066766241923, - "learning_rate": 8.551465084147826e-06, - "loss": 0.5981, + "epoch": 0.18, + "grad_norm": 2.0485918882118925, + "learning_rate": 9.399698349297794e-06, + "loss": 0.5319, "step": 2577 }, { - "epoch": 0.27, - "grad_norm": 2.5819351901307446, - "learning_rate": 8.550265254184163e-06, - "loss": 0.6906, + "epoch": 0.18, + "grad_norm": 0.837404319138409, + "learning_rate": 9.399152280602552e-06, + "loss": 0.4748, "step": 2578 }, { - "epoch": 0.27, - "grad_norm": 2.837791982234303, - "learning_rate": 8.549065011760948e-06, - "loss": 0.6177, + "epoch": 0.18, + "grad_norm": 1.76707225465034, + "learning_rate": 9.39860597952918e-06, + "loss": 0.582, "step": 2579 }, { - "epoch": 0.27, - "grad_norm": 4.767739222282302, - "learning_rate": 8.547864357017618e-06, - "loss": 0.6715, + "epoch": 0.18, + "grad_norm": 1.873390880232523, + "learning_rate": 9.398059446106536e-06, + "loss": 0.5326, "step": 2580 }, { - "epoch": 0.27, - "grad_norm": 2.6955870932338923, - "learning_rate": 8.546663290093663e-06, - "loss": 0.6986, + "epoch": 0.18, + "grad_norm": 1.592886485456597, + "learning_rate": 9.39751268036349e-06, + "loss": 0.5206, "step": 2581 }, { - "epoch": 0.27, - "grad_norm": 2.542337333218461, - "learning_rate": 8.545461811128618e-06, - "loss": 0.7228, + "epoch": 0.18, + "grad_norm": 4.968688377926814, + "learning_rate": 9.396965682328921e-06, + "loss": 0.5266, "step": 2582 }, { - "epoch": 0.27, - "grad_norm": 2.345404508684496, - "learning_rate": 8.54425992026207e-06, - "loss": 0.6988, + "epoch": 0.18, + "grad_norm": 1.6004959451998464, + "learning_rate": 9.396418452031727e-06, + "loss": 0.491, "step": 2583 }, { - "epoch": 0.27, - "grad_norm": 3.046812475477053, - "learning_rate": 8.54305761763365e-06, - "loss": 0.6544, + "epoch": 0.18, + "grad_norm": 8.005602093008575, + "learning_rate": 9.395870989500813e-06, + "loss": 0.5692, "step": 2584 }, { - "epoch": 0.27, - "grad_norm": 2.0768028881923986, - "learning_rate": 8.541854903383038e-06, - "loss": 0.6944, + "epoch": 0.18, + "grad_norm": 2.7446158299839674, + "learning_rate": 9.395323294765098e-06, + "loss": 0.5836, "step": 2585 }, { - "epoch": 0.27, - "grad_norm": 2.4296065875213158, - "learning_rate": 8.54065177764996e-06, - "loss": 0.6207, + "epoch": 0.18, + "grad_norm": 1.761761231406756, + "learning_rate": 9.394775367853514e-06, + "loss": 0.6417, "step": 2586 }, { - "epoch": 0.27, - "grad_norm": 2.333406207555947, - "learning_rate": 8.539448240574196e-06, - "loss": 0.7403, + "epoch": 0.18, + "grad_norm": 1.4918757187191198, + "learning_rate": 9.394227208795003e-06, + "loss": 0.5622, "step": 2587 }, { - "epoch": 0.27, - "grad_norm": 3.2395561665415036, - "learning_rate": 8.538244292295565e-06, - "loss": 0.6861, + "epoch": 0.18, + "grad_norm": 4.1218975747136035, + "learning_rate": 9.393678817618521e-06, + "loss": 0.5997, "step": 2588 }, { - "epoch": 0.27, - "grad_norm": 1.1745677467987092, - "learning_rate": 8.537039932953941e-06, - "loss": 0.6158, + "epoch": 0.18, + "grad_norm": 1.5175669631492863, + "learning_rate": 9.393130194353037e-06, + "loss": 0.5458, "step": 2589 }, { - "epoch": 0.27, - "grad_norm": 2.7633198958097256, - "learning_rate": 8.535835162689243e-06, - "loss": 0.6364, + "epoch": 0.18, + "grad_norm": 1.8118477116796343, + "learning_rate": 9.39258133902753e-06, + "loss": 0.5306, "step": 2590 }, { - "epoch": 0.27, - "grad_norm": 2.5596406892643273, - "learning_rate": 8.534629981641435e-06, - "loss": 0.6835, + "epoch": 0.18, + "grad_norm": 2.2659503338473064, + "learning_rate": 9.39203225167099e-06, + "loss": 0.5671, "step": 2591 }, { - "epoch": 0.27, - "grad_norm": 2.182600890353231, - "learning_rate": 8.533424389950534e-06, - "loss": 0.574, + "epoch": 0.18, + "grad_norm": 2.0327881960757046, + "learning_rate": 9.391482932312428e-06, + "loss": 0.5995, "step": 2592 }, { - "epoch": 0.27, - "grad_norm": 2.6915694598896676, - "learning_rate": 8.532218387756603e-06, - "loss": 0.6927, + "epoch": 0.18, + "grad_norm": 2.1207541964283663, + "learning_rate": 9.390933380980856e-06, + "loss": 0.5097, "step": 2593 }, { - "epoch": 0.27, - "grad_norm": 3.6283644209612125, - "learning_rate": 8.531011975199747e-06, - "loss": 0.6385, + "epoch": 0.18, + "grad_norm": 1.5349126682520167, + "learning_rate": 9.390383597705302e-06, + "loss": 0.5493, "step": 2594 }, { - "epoch": 0.27, - "grad_norm": 2.9155113489734292, - "learning_rate": 8.52980515242013e-06, - "loss": 0.6983, + "epoch": 0.18, + "grad_norm": 1.285321056761365, + "learning_rate": 9.389833582514812e-06, + "loss": 0.5193, "step": 2595 }, { - "epoch": 0.27, - "grad_norm": 8.980349217997206, - "learning_rate": 8.528597919557953e-06, - "loss": 0.7198, + "epoch": 0.18, + "grad_norm": 0.8822277018659853, + "learning_rate": 9.389283335438437e-06, + "loss": 0.4624, "step": 2596 }, { - "epoch": 0.27, - "grad_norm": 2.810695167253675, - "learning_rate": 8.52739027675347e-06, - "loss": 0.6109, + "epoch": 0.18, + "grad_norm": 2.3202671798706858, + "learning_rate": 9.388732856505243e-06, + "loss": 0.5745, "step": 2597 }, { - "epoch": 0.27, - "grad_norm": 2.9757821780269134, - "learning_rate": 8.526182224146982e-06, - "loss": 0.6589, + "epoch": 0.18, + "grad_norm": 2.1142874650036854, + "learning_rate": 9.388182145744309e-06, + "loss": 0.5982, "step": 2598 }, { - "epoch": 0.27, - "grad_norm": 3.5030227005870316, - "learning_rate": 8.524973761878834e-06, - "loss": 0.7574, + "epoch": 0.18, + "grad_norm": 1.5512397191182885, + "learning_rate": 9.387631203184725e-06, + "loss": 0.5311, "step": 2599 }, { - "epoch": 0.27, - "grad_norm": 1.9534355234534457, - "learning_rate": 8.523764890089425e-06, - "loss": 0.6311, + "epoch": 0.18, + "grad_norm": 2.3109479027975364, + "learning_rate": 9.38708002885559e-06, + "loss": 0.5982, "step": 2600 }, { - "epoch": 0.27, - "grad_norm": 2.2968695529272085, - "learning_rate": 8.522555608919198e-06, - "loss": 0.7159, + "epoch": 0.18, + "grad_norm": 1.85355755281865, + "learning_rate": 9.386528622786027e-06, + "loss": 0.5247, "step": 2601 }, { - "epoch": 0.27, - "grad_norm": 2.5862520567000935, - "learning_rate": 8.521345918508644e-06, - "loss": 0.6494, + "epoch": 0.18, + "grad_norm": 1.55967786379368, + "learning_rate": 9.385976985005157e-06, + "loss": 0.601, "step": 2602 }, { - "epoch": 0.27, - "grad_norm": 2.2346246576533058, - "learning_rate": 8.520135818998299e-06, - "loss": 0.6798, + "epoch": 0.18, + "grad_norm": 1.704242366348073, + "learning_rate": 9.385425115542121e-06, + "loss": 0.6231, "step": 2603 }, { - "epoch": 0.27, - "grad_norm": 2.3638719754838653, - "learning_rate": 8.518925310528749e-06, - "loss": 0.728, + "epoch": 0.18, + "grad_norm": 2.4441816375564893, + "learning_rate": 9.38487301442607e-06, + "loss": 0.659, "step": 2604 }, { - "epoch": 0.27, - "grad_norm": 3.03918196825084, - "learning_rate": 8.51771439324063e-06, - "loss": 0.7459, + "epoch": 0.18, + "grad_norm": 2.3802781051591566, + "learning_rate": 9.38432068168617e-06, + "loss": 0.5739, "step": 2605 }, { - "epoch": 0.27, - "grad_norm": 2.3225257145217433, - "learning_rate": 8.516503067274622e-06, - "loss": 0.7274, + "epoch": 0.18, + "grad_norm": 1.776515011177324, + "learning_rate": 9.383768117351592e-06, + "loss": 0.5461, "step": 2606 }, { - "epoch": 0.27, - "grad_norm": 2.186434199840289, - "learning_rate": 8.515291332771452e-06, - "loss": 0.6888, + "epoch": 0.18, + "grad_norm": 1.8232847215069217, + "learning_rate": 9.38321532145153e-06, + "loss": 0.5502, "step": 2607 }, { - "epoch": 0.27, - "grad_norm": 1.1134392839374618, - "learning_rate": 8.514079189871898e-06, - "loss": 0.6312, + "epoch": 0.19, + "grad_norm": 1.7270063260291788, + "learning_rate": 9.382662294015184e-06, + "loss": 0.5863, "step": 2608 }, { - "epoch": 0.27, - "grad_norm": 2.563735931453355, - "learning_rate": 8.51286663871678e-06, - "loss": 0.7038, + "epoch": 0.19, + "grad_norm": 1.6418939985880066, + "learning_rate": 9.382109035071764e-06, + "loss": 0.5677, "step": 2609 }, { - "epoch": 0.27, - "grad_norm": 7.940504117749469, - "learning_rate": 8.511653679446972e-06, - "loss": 0.7079, + "epoch": 0.19, + "grad_norm": 1.6395678368626774, + "learning_rate": 9.381555544650497e-06, + "loss": 0.4974, "step": 2610 }, { - "epoch": 0.27, - "grad_norm": 3.2533130654227143, - "learning_rate": 8.51044031220339e-06, - "loss": 0.6543, + "epoch": 0.19, + "grad_norm": 1.7078904065419123, + "learning_rate": 9.381001822780617e-06, + "loss": 0.6086, "step": 2611 }, { - "epoch": 0.27, - "grad_norm": 3.8589646098695116, - "learning_rate": 8.509226537127e-06, - "loss": 0.6759, + "epoch": 0.19, + "grad_norm": 1.7110560302002797, + "learning_rate": 9.380447869491376e-06, + "loss": 0.5825, "step": 2612 }, { - "epoch": 0.27, - "grad_norm": 3.5253019336589153, - "learning_rate": 8.508012354358815e-06, - "loss": 0.5805, + "epoch": 0.19, + "grad_norm": 1.7976698618640448, + "learning_rate": 9.379893684812037e-06, + "loss": 0.5638, "step": 2613 }, { - "epoch": 0.28, - "grad_norm": 3.1367218095988973, - "learning_rate": 8.506797764039895e-06, - "loss": 0.6941, + "epoch": 0.19, + "grad_norm": 1.7076217910001894, + "learning_rate": 9.379339268771872e-06, + "loss": 0.5544, "step": 2614 }, { - "epoch": 0.28, - "grad_norm": 3.0713095113351727, - "learning_rate": 8.505582766311349e-06, - "loss": 0.6673, + "epoch": 0.19, + "grad_norm": 2.797802868739752, + "learning_rate": 9.378784621400167e-06, + "loss": 0.5949, "step": 2615 }, { - "epoch": 0.28, - "grad_norm": 2.312239589857587, - "learning_rate": 8.504367361314329e-06, - "loss": 0.6638, + "epoch": 0.19, + "grad_norm": 1.6100316016862593, + "learning_rate": 9.378229742726222e-06, + "loss": 0.5734, "step": 2616 }, { - "epoch": 0.28, - "grad_norm": 2.8300865651331475, - "learning_rate": 8.50315154919004e-06, - "loss": 0.7389, + "epoch": 0.19, + "grad_norm": 1.7462508333928493, + "learning_rate": 9.377674632779345e-06, + "loss": 0.5298, "step": 2617 }, { - "epoch": 0.28, - "grad_norm": 2.5470160475934462, - "learning_rate": 8.501935330079732e-06, - "loss": 0.7534, + "epoch": 0.19, + "grad_norm": 2.6176126953712715, + "learning_rate": 9.377119291588863e-06, + "loss": 0.5585, "step": 2618 }, { - "epoch": 0.28, - "grad_norm": 3.316557953808444, - "learning_rate": 8.5007187041247e-06, - "loss": 0.7406, + "epoch": 0.19, + "grad_norm": 1.7736125071082263, + "learning_rate": 9.376563719184106e-06, + "loss": 0.5997, "step": 2619 }, { - "epoch": 0.28, - "grad_norm": 2.146381681148758, - "learning_rate": 8.499501671466287e-06, - "loss": 0.663, + "epoch": 0.19, + "grad_norm": 1.5093666569312825, + "learning_rate": 9.376007915594425e-06, + "loss": 0.5566, "step": 2620 }, { - "epoch": 0.28, - "grad_norm": 2.7424691680667523, - "learning_rate": 8.498284232245888e-06, - "loss": 0.6684, + "epoch": 0.19, + "grad_norm": 1.7638343018482276, + "learning_rate": 9.375451880849177e-06, + "loss": 0.5626, "step": 2621 }, { - "epoch": 0.28, - "grad_norm": 2.812793415389665, - "learning_rate": 8.497066386604937e-06, - "loss": 0.7148, + "epoch": 0.19, + "grad_norm": 1.3957158352309658, + "learning_rate": 9.374895614977735e-06, + "loss": 0.5506, "step": 2622 }, { - "epoch": 0.28, - "grad_norm": 2.4179027330742247, - "learning_rate": 8.495848134684924e-06, - "loss": 0.5672, + "epoch": 0.19, + "grad_norm": 1.6339591563195888, + "learning_rate": 9.374339118009482e-06, + "loss": 0.5975, "step": 2623 }, { - "epoch": 0.28, - "grad_norm": 2.9317748456252173, - "learning_rate": 8.494629476627378e-06, - "loss": 0.6196, + "epoch": 0.19, + "grad_norm": 2.1234455541382613, + "learning_rate": 9.373782389973814e-06, + "loss": 0.5811, "step": 2624 }, { - "epoch": 0.28, - "grad_norm": 3.8552043750504303, - "learning_rate": 8.493410412573883e-06, - "loss": 0.76, + "epoch": 0.19, + "grad_norm": 2.1477408365642146, + "learning_rate": 9.373225430900142e-06, + "loss": 0.5743, "step": 2625 }, { - "epoch": 0.28, - "grad_norm": 2.1671342068280426, - "learning_rate": 8.492190942666065e-06, - "loss": 0.7228, + "epoch": 0.19, + "grad_norm": 2.0297200595260763, + "learning_rate": 9.372668240817882e-06, + "loss": 0.5178, "step": 2626 }, { - "epoch": 0.28, - "grad_norm": 3.4361650708105707, - "learning_rate": 8.490971067045596e-06, - "loss": 0.7221, + "epoch": 0.19, + "grad_norm": 1.634038271434881, + "learning_rate": 9.37211081975647e-06, + "loss": 0.5434, "step": 2627 }, { - "epoch": 0.28, - "grad_norm": 2.6358964795479687, - "learning_rate": 8.489750785854203e-06, - "loss": 0.6363, + "epoch": 0.19, + "grad_norm": 3.030124648178972, + "learning_rate": 9.37155316774535e-06, + "loss": 0.5279, "step": 2628 }, { - "epoch": 0.28, - "grad_norm": 3.1358464929580174, - "learning_rate": 8.48853009923365e-06, - "loss": 0.7216, + "epoch": 0.19, + "grad_norm": 1.8579014591634935, + "learning_rate": 9.370995284813977e-06, + "loss": 0.566, "step": 2629 }, { - "epoch": 0.28, - "grad_norm": 2.3541129682126383, - "learning_rate": 8.487309007325755e-06, - "loss": 0.5821, + "epoch": 0.19, + "grad_norm": 1.6666529706443036, + "learning_rate": 9.370437170991824e-06, + "loss": 0.6035, "step": 2630 }, { - "epoch": 0.28, - "grad_norm": 2.420390390909207, - "learning_rate": 8.48608751027238e-06, - "loss": 0.6743, + "epoch": 0.19, + "grad_norm": 1.7337221909686524, + "learning_rate": 9.36987882630837e-06, + "loss": 0.584, "step": 2631 }, { - "epoch": 0.28, - "grad_norm": 2.4451760999988363, - "learning_rate": 8.484865608215435e-06, - "loss": 0.6701, + "epoch": 0.19, + "grad_norm": 1.917474566721065, + "learning_rate": 9.369320250793108e-06, + "loss": 0.5856, "step": 2632 }, { - "epoch": 0.28, - "grad_norm": 3.4307024526167944, - "learning_rate": 8.483643301296877e-06, - "loss": 0.7317, + "epoch": 0.19, + "grad_norm": 1.7492187323521373, + "learning_rate": 9.368761444475547e-06, + "loss": 0.5627, "step": 2633 }, { - "epoch": 0.28, - "grad_norm": 2.2798188313827032, - "learning_rate": 8.482420589658712e-06, - "loss": 0.6603, + "epoch": 0.19, + "grad_norm": 1.6479695183528194, + "learning_rate": 9.368202407385202e-06, + "loss": 0.6087, "step": 2634 }, { - "epoch": 0.28, - "grad_norm": 2.7920937123015785, - "learning_rate": 8.481197473442989e-06, - "loss": 0.7351, + "epoch": 0.19, + "grad_norm": 1.4896338025488556, + "learning_rate": 9.367643139551605e-06, + "loss": 0.6354, "step": 2635 }, { - "epoch": 0.28, - "grad_norm": 3.1650536164648404, - "learning_rate": 8.479973952791805e-06, - "loss": 0.747, + "epoch": 0.19, + "grad_norm": 1.9834919904509418, + "learning_rate": 9.367083641004298e-06, + "loss": 0.5776, "step": 2636 }, { - "epoch": 0.28, - "grad_norm": 2.4408149875447256, - "learning_rate": 8.478750027847308e-06, - "loss": 0.7117, + "epoch": 0.19, + "grad_norm": 1.573210213240799, + "learning_rate": 9.366523911772833e-06, + "loss": 0.5699, "step": 2637 }, { - "epoch": 0.28, - "grad_norm": 2.5845290741492817, - "learning_rate": 8.477525698751688e-06, - "loss": 0.6305, + "epoch": 0.19, + "grad_norm": 1.6978658747317419, + "learning_rate": 9.365963951886783e-06, + "loss": 0.5684, "step": 2638 }, { - "epoch": 0.28, - "grad_norm": 2.311318523859558, - "learning_rate": 8.476300965647186e-06, - "loss": 0.6609, + "epoch": 0.19, + "grad_norm": 1.5942965702691956, + "learning_rate": 9.36540376137572e-06, + "loss": 0.5217, "step": 2639 }, { - "epoch": 0.28, - "grad_norm": 3.0266589746186376, - "learning_rate": 8.475075828676086e-06, - "loss": 0.663, + "epoch": 0.19, + "grad_norm": 1.5313049684188527, + "learning_rate": 9.36484334026924e-06, + "loss": 0.5628, "step": 2640 }, { - "epoch": 0.28, - "grad_norm": 2.8468358467655803, - "learning_rate": 8.473850287980721e-06, - "loss": 0.6316, + "epoch": 0.19, + "grad_norm": 1.7990590356169134, + "learning_rate": 9.364282688596944e-06, + "loss": 0.5973, "step": 2641 }, { - "epoch": 0.28, - "grad_norm": 3.1150563470392996, - "learning_rate": 8.472624343703473e-06, - "loss": 0.713, + "epoch": 0.19, + "grad_norm": 1.5441365433801806, + "learning_rate": 9.363721806388448e-06, + "loss": 0.5726, "step": 2642 }, { - "epoch": 0.28, - "grad_norm": 3.3018689988839918, - "learning_rate": 8.471397995986766e-06, - "loss": 0.5945, + "epoch": 0.19, + "grad_norm": 1.6013304826389212, + "learning_rate": 9.363160693673381e-06, + "loss": 0.6029, "step": 2643 }, { - "epoch": 0.28, - "grad_norm": 2.299828029802301, - "learning_rate": 8.470171244973075e-06, - "loss": 0.6362, + "epoch": 0.19, + "grad_norm": 1.8330195954941901, + "learning_rate": 9.362599350481381e-06, + "loss": 0.5375, "step": 2644 }, { - "epoch": 0.28, - "grad_norm": 2.400265553940191, - "learning_rate": 8.46894409080492e-06, - "loss": 0.7142, + "epoch": 0.19, + "grad_norm": 0.9253230096495927, + "learning_rate": 9.3620377768421e-06, + "loss": 0.4657, "step": 2645 }, { - "epoch": 0.28, - "grad_norm": 2.3965587472793275, - "learning_rate": 8.467716533624869e-06, - "loss": 0.6693, + "epoch": 0.19, + "grad_norm": 1.6955895606115392, + "learning_rate": 9.361475972785204e-06, + "loss": 0.5285, "step": 2646 }, { - "epoch": 0.28, - "grad_norm": 3.1424468064421527, - "learning_rate": 8.466488573575536e-06, - "loss": 0.6621, + "epoch": 0.19, + "grad_norm": 1.6338381961701471, + "learning_rate": 9.360913938340367e-06, + "loss": 0.6028, "step": 2647 }, { - "epoch": 0.28, - "grad_norm": 4.058208255714223, - "learning_rate": 8.465260210799579e-06, - "loss": 0.5876, + "epoch": 0.19, + "grad_norm": 2.5558080064881703, + "learning_rate": 9.360351673537281e-06, + "loss": 0.5608, "step": 2648 }, { - "epoch": 0.28, - "grad_norm": 5.051342205579871, - "learning_rate": 8.464031445439708e-06, - "loss": 0.6876, + "epoch": 0.19, + "grad_norm": 1.8888786418785593, + "learning_rate": 9.359789178405645e-06, + "loss": 0.611, "step": 2649 }, { - "epoch": 0.28, - "grad_norm": 2.435428283210832, - "learning_rate": 8.462802277638677e-06, - "loss": 0.7141, + "epoch": 0.19, + "grad_norm": 1.7465218500521171, + "learning_rate": 9.35922645297517e-06, + "loss": 0.5865, "step": 2650 }, { - "epoch": 0.28, - "grad_norm": 2.2536304400179357, - "learning_rate": 8.461572707539288e-06, - "loss": 0.7239, + "epoch": 0.19, + "grad_norm": 1.8012329216709846, + "learning_rate": 9.358663497275584e-06, + "loss": 0.6239, "step": 2651 }, { - "epoch": 0.28, - "grad_norm": 2.2033188343885404, - "learning_rate": 8.460342735284388e-06, - "loss": 0.5988, + "epoch": 0.19, + "grad_norm": 1.499749346976142, + "learning_rate": 9.35810031133662e-06, + "loss": 0.5266, "step": 2652 }, { - "epoch": 0.28, - "grad_norm": 2.6488901006220136, - "learning_rate": 8.459112361016873e-06, - "loss": 0.7123, + "epoch": 0.19, + "grad_norm": 2.633110205572291, + "learning_rate": 9.357536895188032e-06, + "loss": 0.6438, "step": 2653 }, { - "epoch": 0.28, - "grad_norm": 2.454166766453256, - "learning_rate": 8.457881584879681e-06, - "loss": 0.7274, + "epoch": 0.19, + "grad_norm": 2.0531348147205706, + "learning_rate": 9.356973248859582e-06, + "loss": 0.58, "step": 2654 }, { - "epoch": 0.28, - "grad_norm": 2.338529320134492, - "learning_rate": 8.456650407015804e-06, - "loss": 0.7723, + "epoch": 0.19, + "grad_norm": 1.471793662162461, + "learning_rate": 9.356409372381039e-06, + "loss": 0.5967, "step": 2655 }, { - "epoch": 0.28, - "grad_norm": 2.7241780151773933, - "learning_rate": 8.455418827568275e-06, - "loss": 0.619, + "epoch": 0.19, + "grad_norm": 1.750064614727594, + "learning_rate": 9.355845265782192e-06, + "loss": 0.5383, "step": 2656 }, { - "epoch": 0.28, - "grad_norm": 2.2430549835972053, - "learning_rate": 8.454186846680174e-06, - "loss": 0.6374, + "epoch": 0.19, + "grad_norm": 1.938372331552809, + "learning_rate": 9.35528092909284e-06, + "loss": 0.5725, "step": 2657 }, { - "epoch": 0.28, - "grad_norm": 3.090373805102784, - "learning_rate": 8.452954464494631e-06, - "loss": 0.7013, + "epoch": 0.19, + "grad_norm": 1.852632822863334, + "learning_rate": 9.35471636234279e-06, + "loss": 0.6131, "step": 2658 }, { - "epoch": 0.28, - "grad_norm": 2.50997857984063, - "learning_rate": 8.451721681154819e-06, - "loss": 0.6987, + "epoch": 0.19, + "grad_norm": 2.029716513808916, + "learning_rate": 9.354151565561866e-06, + "loss": 0.5218, "step": 2659 }, { - "epoch": 0.28, - "grad_norm": 2.318114958320992, - "learning_rate": 8.45048849680396e-06, - "loss": 0.7233, + "epoch": 0.19, + "grad_norm": 1.5952089424993123, + "learning_rate": 9.353586538779904e-06, + "loss": 0.5358, "step": 2660 }, { - "epoch": 0.28, - "grad_norm": 1.2346055238002462, - "learning_rate": 8.449254911585323e-06, - "loss": 0.6527, + "epoch": 0.19, + "grad_norm": 1.8091190944488702, + "learning_rate": 9.353021282026746e-06, + "loss": 0.5195, "step": 2661 }, { - "epoch": 0.28, - "grad_norm": 2.293606280499452, - "learning_rate": 8.44802092564222e-06, - "loss": 0.6759, + "epoch": 0.19, + "grad_norm": 1.7456347832019496, + "learning_rate": 9.352455795332256e-06, + "loss": 0.5861, "step": 2662 }, { - "epoch": 0.28, - "grad_norm": 1.206662259361233, - "learning_rate": 8.446786539118014e-06, - "loss": 0.6139, + "epoch": 0.19, + "grad_norm": 2.367228925100294, + "learning_rate": 9.351890078726302e-06, + "loss": 0.5339, "step": 2663 }, { - "epoch": 0.28, - "grad_norm": 12.24962776644313, - "learning_rate": 8.445551752156111e-06, - "loss": 0.6434, + "epoch": 0.19, + "grad_norm": 0.8503799719453027, + "learning_rate": 9.351324132238769e-06, + "loss": 0.4918, "step": 2664 }, { - "epoch": 0.28, - "grad_norm": 3.2212865543394447, - "learning_rate": 8.444316564899966e-06, - "loss": 0.6465, + "epoch": 0.19, + "grad_norm": 1.6050662513289877, + "learning_rate": 9.35075795589955e-06, + "loss": 0.5781, "step": 2665 }, { - "epoch": 0.28, - "grad_norm": 2.714117520697914, - "learning_rate": 8.443080977493078e-06, - "loss": 0.5998, + "epoch": 0.19, + "grad_norm": 1.5132998636556, + "learning_rate": 9.350191549738553e-06, + "loss": 0.6002, "step": 2666 }, { - "epoch": 0.28, - "grad_norm": 3.336735248734921, - "learning_rate": 8.441844990078995e-06, - "loss": 0.7573, + "epoch": 0.19, + "grad_norm": 2.087687216772786, + "learning_rate": 9.349624913785698e-06, + "loss": 0.5374, "step": 2667 }, { - "epoch": 0.28, - "grad_norm": 2.403703415234515, - "learning_rate": 8.44060860280131e-06, - "loss": 0.6043, + "epoch": 0.19, + "grad_norm": 1.532427664951365, + "learning_rate": 9.349058048070917e-06, + "loss": 0.5825, "step": 2668 }, { - "epoch": 0.28, - "grad_norm": 2.382422434010672, - "learning_rate": 8.439371815803666e-06, - "loss": 0.7587, + "epoch": 0.19, + "grad_norm": 1.8306681328865309, + "learning_rate": 9.348490952624154e-06, + "loss": 0.6313, "step": 2669 }, { - "epoch": 0.28, - "grad_norm": 3.0968077866837533, - "learning_rate": 8.438134629229746e-06, - "loss": 0.6986, + "epoch": 0.19, + "grad_norm": 1.6244002124866752, + "learning_rate": 9.347923627475364e-06, + "loss": 0.5784, "step": 2670 }, { - "epoch": 0.28, - "grad_norm": 2.43034993732329, - "learning_rate": 8.436897043223282e-06, - "loss": 0.5956, + "epoch": 0.19, + "grad_norm": 0.7325186533487434, + "learning_rate": 9.347356072654515e-06, + "loss": 0.4917, "step": 2671 }, { - "epoch": 0.28, - "grad_norm": 2.396150179364192, - "learning_rate": 8.435659057928054e-06, - "loss": 0.6747, + "epoch": 0.19, + "grad_norm": 1.5886021512734612, + "learning_rate": 9.346788288191587e-06, + "loss": 0.5674, "step": 2672 }, { - "epoch": 0.28, - "grad_norm": 2.7277764541975973, - "learning_rate": 8.434420673487888e-06, - "loss": 0.6954, + "epoch": 0.19, + "grad_norm": 2.0005704089100464, + "learning_rate": 9.34622027411657e-06, + "loss": 0.5647, "step": 2673 }, { - "epoch": 0.28, - "grad_norm": 2.217561657414955, - "learning_rate": 8.433181890046658e-06, - "loss": 0.6458, + "epoch": 0.19, + "grad_norm": 1.607453402823058, + "learning_rate": 9.345652030459476e-06, + "loss": 0.5551, "step": 2674 }, { - "epoch": 0.28, - "grad_norm": 9.236257143877165, - "learning_rate": 8.431942707748279e-06, - "loss": 0.6915, + "epoch": 0.19, + "grad_norm": 1.4893400215077714, + "learning_rate": 9.345083557250314e-06, + "loss": 0.5359, "step": 2675 }, { - "epoch": 0.28, - "grad_norm": 3.0699718693696636, - "learning_rate": 8.430703126736717e-06, - "loss": 0.6652, + "epoch": 0.19, + "grad_norm": 1.9916520981979555, + "learning_rate": 9.344514854519116e-06, + "loss": 0.5185, "step": 2676 }, { - "epoch": 0.28, - "grad_norm": 3.175699865000509, - "learning_rate": 8.429463147155984e-06, - "loss": 0.772, + "epoch": 0.19, + "grad_norm": 1.7677222673847466, + "learning_rate": 9.343945922295921e-06, + "loss": 0.5446, "step": 2677 }, { - "epoch": 0.28, - "grad_norm": 2.7545634656991043, - "learning_rate": 8.428222769150137e-06, - "loss": 0.6886, + "epoch": 0.19, + "grad_norm": 2.257592909339425, + "learning_rate": 9.343376760610784e-06, + "loss": 0.5444, "step": 2678 }, { - "epoch": 0.28, - "grad_norm": 2.448219091164623, - "learning_rate": 8.426981992863276e-06, - "loss": 0.6719, + "epoch": 0.19, + "grad_norm": 1.6956966298153642, + "learning_rate": 9.34280736949377e-06, + "loss": 0.5211, "step": 2679 }, { - "epoch": 0.28, - "grad_norm": 2.2382890240649553, - "learning_rate": 8.425740818439553e-06, - "loss": 0.7212, + "epoch": 0.19, + "grad_norm": 0.9059015333575456, + "learning_rate": 9.342237748974955e-06, + "loss": 0.4368, "step": 2680 }, { - "epoch": 0.28, - "grad_norm": 2.483770641222447, - "learning_rate": 8.424499246023168e-06, - "loss": 0.7427, + "epoch": 0.19, + "grad_norm": 1.6223718405187346, + "learning_rate": 9.341667899084428e-06, + "loss": 0.5681, "step": 2681 }, { - "epoch": 0.28, - "grad_norm": 3.0361221580953, - "learning_rate": 8.42325727575836e-06, - "loss": 0.7023, + "epoch": 0.19, + "grad_norm": 1.93930917580587, + "learning_rate": 9.341097819852291e-06, + "loss": 0.6123, "step": 2682 }, { - "epoch": 0.28, - "grad_norm": 2.1519597344947137, - "learning_rate": 8.422014907789413e-06, - "loss": 0.7392, + "epoch": 0.19, + "grad_norm": 1.9454715454201428, + "learning_rate": 9.340527511308657e-06, + "loss": 0.5691, "step": 2683 }, { - "epoch": 0.28, - "grad_norm": 2.1345686803799038, - "learning_rate": 8.420772142260667e-06, - "loss": 0.6333, + "epoch": 0.19, + "grad_norm": 1.649709076954933, + "learning_rate": 9.339956973483653e-06, + "loss": 0.4699, "step": 2684 }, { - "epoch": 0.28, - "grad_norm": 2.287284430544546, - "learning_rate": 8.419528979316505e-06, - "loss": 0.7573, + "epoch": 0.19, + "grad_norm": 1.767930572382086, + "learning_rate": 9.339386206407415e-06, + "loss": 0.6188, "step": 2685 }, { - "epoch": 0.28, - "grad_norm": 2.4381162142757145, - "learning_rate": 8.41828541910135e-06, - "loss": 0.7724, + "epoch": 0.19, + "grad_norm": 1.713824603689268, + "learning_rate": 9.338815210110094e-06, + "loss": 0.5829, "step": 2686 }, { - "epoch": 0.28, - "grad_norm": 2.8332535794197633, - "learning_rate": 8.417041461759674e-06, - "loss": 0.6979, + "epoch": 0.19, + "grad_norm": 2.122288479263122, + "learning_rate": 9.338243984621853e-06, + "loss": 0.5553, "step": 2687 }, { - "epoch": 0.28, - "grad_norm": 1.1728298056179722, - "learning_rate": 8.415797107436e-06, - "loss": 0.627, + "epoch": 0.19, + "grad_norm": 1.6391471901251369, + "learning_rate": 9.337672529972864e-06, + "loss": 0.5358, "step": 2688 }, { - "epoch": 0.28, - "grad_norm": 1.1601804909844005, - "learning_rate": 8.414552356274891e-06, - "loss": 0.6739, + "epoch": 0.19, + "grad_norm": 1.6979606147738098, + "learning_rate": 9.337100846193315e-06, + "loss": 0.6522, "step": 2689 }, { - "epoch": 0.28, - "grad_norm": 2.679136900723749, - "learning_rate": 8.413307208420963e-06, - "loss": 0.7404, + "epoch": 0.19, + "grad_norm": 1.5794117499319513, + "learning_rate": 9.336528933313401e-06, + "loss": 0.5307, "step": 2690 }, { - "epoch": 0.28, - "grad_norm": 2.5824680380348557, - "learning_rate": 8.412061664018869e-06, - "loss": 0.745, + "epoch": 0.19, + "grad_norm": 0.8278962751786917, + "learning_rate": 9.335956791363334e-06, + "loss": 0.4892, "step": 2691 }, { - "epoch": 0.28, - "grad_norm": 3.209895645078567, - "learning_rate": 8.410815723213312e-06, - "loss": 0.6478, + "epoch": 0.19, + "grad_norm": 1.768856805810727, + "learning_rate": 9.33538442037334e-06, + "loss": 0.613, "step": 2692 }, { - "epoch": 0.28, - "grad_norm": 2.1856854173402795, - "learning_rate": 8.409569386149046e-06, - "loss": 0.6822, + "epoch": 0.19, + "grad_norm": 1.990152764606004, + "learning_rate": 9.33481182037365e-06, + "loss": 0.5884, "step": 2693 }, { - "epoch": 0.28, - "grad_norm": 2.3084967091893653, - "learning_rate": 8.408322652970866e-06, - "loss": 0.7768, + "epoch": 0.19, + "grad_norm": 0.8915895709217105, + "learning_rate": 9.33423899139451e-06, + "loss": 0.4868, "step": 2694 }, { - "epoch": 0.28, - "grad_norm": 3.9063292951795425, - "learning_rate": 8.40707552382361e-06, - "loss": 0.6608, + "epoch": 0.19, + "grad_norm": 1.4354521242149685, + "learning_rate": 9.33366593346618e-06, + "loss": 0.5642, "step": 2695 }, { - "epoch": 0.28, - "grad_norm": 2.051324396490213, - "learning_rate": 8.40582799885217e-06, - "loss": 0.5923, + "epoch": 0.19, + "grad_norm": 1.3603694806533402, + "learning_rate": 9.333092646618931e-06, + "loss": 0.5362, "step": 2696 }, { - "epoch": 0.28, - "grad_norm": 2.765515717742255, - "learning_rate": 8.404580078201476e-06, - "loss": 0.7872, + "epoch": 0.19, + "grad_norm": 4.7777472960556855, + "learning_rate": 9.332519130883046e-06, + "loss": 0.5734, "step": 2697 }, { - "epoch": 0.28, - "grad_norm": 3.1789517487510066, - "learning_rate": 8.403331762016514e-06, - "loss": 0.6043, + "epoch": 0.19, + "grad_norm": 1.6561012563243063, + "learning_rate": 9.331945386288821e-06, + "loss": 0.5553, "step": 2698 }, { - "epoch": 0.28, - "grad_norm": 2.2655735243519515, - "learning_rate": 8.402083050442302e-06, - "loss": 0.5652, + "epoch": 0.19, + "grad_norm": 1.748245783496589, + "learning_rate": 9.331371412866561e-06, + "loss": 0.5269, "step": 2699 }, { - "epoch": 0.28, - "grad_norm": 2.2602508493232687, - "learning_rate": 8.400833943623919e-06, - "loss": 0.7123, + "epoch": 0.19, + "grad_norm": 1.767364031410995, + "learning_rate": 9.330797210646586e-06, + "loss": 0.5688, "step": 2700 }, { - "epoch": 0.28, - "grad_norm": 2.5454383180770535, - "learning_rate": 8.399584441706477e-06, - "loss": 0.699, + "epoch": 0.19, + "grad_norm": 1.6544923629096815, + "learning_rate": 9.330222779659231e-06, + "loss": 0.605, "step": 2701 }, { - "epoch": 0.28, - "grad_norm": 2.2564015333342606, - "learning_rate": 8.398334544835143e-06, - "loss": 0.6204, + "epoch": 0.19, + "grad_norm": 1.7671716669094415, + "learning_rate": 9.329648119934831e-06, + "loss": 0.5856, "step": 2702 }, { - "epoch": 0.28, - "grad_norm": 2.418647080107335, - "learning_rate": 8.397084253155125e-06, - "loss": 0.6295, + "epoch": 0.19, + "grad_norm": 1.4701893197752867, + "learning_rate": 9.329073231503748e-06, + "loss": 0.5307, "step": 2703 }, { - "epoch": 0.28, - "grad_norm": 3.25654659367241, - "learning_rate": 8.395833566811677e-06, - "loss": 0.692, + "epoch": 0.19, + "grad_norm": 2.4126540532962952, + "learning_rate": 9.32849811439635e-06, + "loss": 0.5199, "step": 2704 }, { - "epoch": 0.28, - "grad_norm": 2.666744974325964, - "learning_rate": 8.394582485950103e-06, - "loss": 0.6038, + "epoch": 0.19, + "grad_norm": 1.5334625929423407, + "learning_rate": 9.327922768643014e-06, + "loss": 0.5559, "step": 2705 }, { - "epoch": 0.28, - "grad_norm": 2.4348825850755493, - "learning_rate": 8.393331010715749e-06, - "loss": 0.6997, + "epoch": 0.19, + "grad_norm": 1.8837425198733049, + "learning_rate": 9.327347194274132e-06, + "loss": 0.5414, "step": 2706 }, { - "epoch": 0.28, - "grad_norm": 2.0592895390345376, - "learning_rate": 8.392079141254006e-06, - "loss": 0.6366, + "epoch": 0.19, + "grad_norm": 1.6946195327473563, + "learning_rate": 9.326771391320107e-06, + "loss": 0.5963, "step": 2707 }, { - "epoch": 0.28, - "grad_norm": 2.0843302668304746, - "learning_rate": 8.390826877710314e-06, - "loss": 0.595, + "epoch": 0.19, + "grad_norm": 1.8396022439047297, + "learning_rate": 9.326195359811358e-06, + "loss": 0.6024, "step": 2708 }, { - "epoch": 0.29, - "grad_norm": 2.693813971402732, - "learning_rate": 8.38957422023016e-06, - "loss": 0.6939, + "epoch": 0.19, + "grad_norm": 1.7533513419434477, + "learning_rate": 9.325619099778309e-06, + "loss": 0.6207, "step": 2709 }, { - "epoch": 0.29, - "grad_norm": 2.4215728777192447, - "learning_rate": 8.388321168959068e-06, - "loss": 0.7119, + "epoch": 0.19, + "grad_norm": 1.722301659606369, + "learning_rate": 9.325042611251402e-06, + "loss": 0.5849, "step": 2710 }, { - "epoch": 0.29, - "grad_norm": 2.4275301307354837, - "learning_rate": 8.387067724042618e-06, - "loss": 0.6875, + "epoch": 0.19, + "grad_norm": 1.582886891750824, + "learning_rate": 9.324465894261092e-06, + "loss": 0.5153, "step": 2711 }, { - "epoch": 0.29, - "grad_norm": 6.524758976644784, - "learning_rate": 8.38581388562643e-06, - "loss": 0.7328, + "epoch": 0.19, + "grad_norm": 1.4863747004972452, + "learning_rate": 9.323888948837837e-06, + "loss": 0.556, "step": 2712 }, { - "epoch": 0.29, - "grad_norm": 2.529001789661384, - "learning_rate": 8.384559653856174e-06, - "loss": 0.6467, + "epoch": 0.19, + "grad_norm": 1.8234970064630815, + "learning_rate": 9.323311775012117e-06, + "loss": 0.6502, "step": 2713 }, { - "epoch": 0.29, - "grad_norm": 2.339231251838686, - "learning_rate": 8.383305028877559e-06, - "loss": 0.6954, + "epoch": 0.19, + "grad_norm": 1.7080485836060524, + "learning_rate": 9.322734372814419e-06, + "loss": 0.5812, "step": 2714 }, { - "epoch": 0.29, - "grad_norm": 2.422685898720981, - "learning_rate": 8.382050010836349e-06, - "loss": 0.7583, + "epoch": 0.19, + "grad_norm": 1.5635875611522034, + "learning_rate": 9.322156742275246e-06, + "loss": 0.5415, "step": 2715 }, { - "epoch": 0.29, - "grad_norm": 5.610823185514183, - "learning_rate": 8.380794599878343e-06, - "loss": 0.67, + "epoch": 0.19, + "grad_norm": 1.5682992823464865, + "learning_rate": 9.321578883425107e-06, + "loss": 0.6157, "step": 2716 }, { - "epoch": 0.29, - "grad_norm": 3.642245922603603, - "learning_rate": 8.379538796149395e-06, - "loss": 0.6711, + "epoch": 0.19, + "grad_norm": 1.8879359364445825, + "learning_rate": 9.321000796294528e-06, + "loss": 0.5831, "step": 2717 }, { - "epoch": 0.29, - "grad_norm": 2.3135555055352635, - "learning_rate": 8.378282599795397e-06, - "loss": 0.6705, + "epoch": 0.19, + "grad_norm": 0.9007530539335525, + "learning_rate": 9.320422480914047e-06, + "loss": 0.4725, "step": 2718 }, { - "epoch": 0.29, - "grad_norm": 2.447841830239155, - "learning_rate": 8.377026010962293e-06, - "loss": 0.6548, + "epoch": 0.19, + "grad_norm": 3.8624337151019157, + "learning_rate": 9.319843937314209e-06, + "loss": 0.5509, "step": 2719 }, { - "epoch": 0.29, - "grad_norm": 2.349506243780416, - "learning_rate": 8.375769029796068e-06, - "loss": 0.6988, + "epoch": 0.19, + "grad_norm": 0.8119596034783825, + "learning_rate": 9.319265165525578e-06, + "loss": 0.474, "step": 2720 }, { - "epoch": 0.29, - "grad_norm": 3.4916280933140373, - "learning_rate": 8.374511656442756e-06, - "loss": 0.6912, + "epoch": 0.19, + "grad_norm": 1.7676986712871225, + "learning_rate": 9.318686165578723e-06, + "loss": 0.5624, "step": 2721 }, { - "epoch": 0.29, - "grad_norm": 2.008729698463561, - "learning_rate": 8.373253891048436e-06, - "loss": 0.6498, + "epoch": 0.19, + "grad_norm": 1.8794206953558208, + "learning_rate": 9.318106937504233e-06, + "loss": 0.587, "step": 2722 }, { - "epoch": 0.29, - "grad_norm": 2.504995655860519, - "learning_rate": 8.371995733759228e-06, - "loss": 0.6303, + "epoch": 0.19, + "grad_norm": 2.1736293605127073, + "learning_rate": 9.317527481332702e-06, + "loss": 0.6039, "step": 2723 }, { - "epoch": 0.29, - "grad_norm": 4.465351579775575, - "learning_rate": 8.370737184721305e-06, - "loss": 0.6931, + "epoch": 0.19, + "grad_norm": 1.625501086748244, + "learning_rate": 9.316947797094742e-06, + "loss": 0.5239, "step": 2724 }, { - "epoch": 0.29, - "grad_norm": 2.786999536883786, - "learning_rate": 8.369478244080878e-06, - "loss": 0.6444, + "epoch": 0.19, + "grad_norm": 1.7101962797037158, + "learning_rate": 9.316367884820968e-06, + "loss": 0.589, "step": 2725 }, { - "epoch": 0.29, - "grad_norm": 3.089894612238134, - "learning_rate": 8.368218911984211e-06, - "loss": 0.7627, + "epoch": 0.19, + "grad_norm": 1.5840957800654951, + "learning_rate": 9.31578774454202e-06, + "loss": 0.6113, "step": 2726 }, { - "epoch": 0.29, - "grad_norm": 2.929159775372233, - "learning_rate": 8.366959188577606e-06, - "loss": 0.6889, + "epoch": 0.19, + "grad_norm": 1.7659698660503211, + "learning_rate": 9.315207376288535e-06, + "loss": 0.588, "step": 2727 }, { - "epoch": 0.29, - "grad_norm": 5.440489695426411, - "learning_rate": 8.365699074007416e-06, - "loss": 0.6793, + "epoch": 0.19, + "grad_norm": 1.5562425734213947, + "learning_rate": 9.314626780091178e-06, + "loss": 0.5898, "step": 2728 }, { - "epoch": 0.29, - "grad_norm": 3.058496978919441, - "learning_rate": 8.364438568420034e-06, - "loss": 0.6912, + "epoch": 0.19, + "grad_norm": 1.7443772309489198, + "learning_rate": 9.314045955980613e-06, + "loss": 0.5514, "step": 2729 }, { - "epoch": 0.29, - "grad_norm": 2.4524683201064064, - "learning_rate": 8.363177671961908e-06, - "loss": 0.6447, + "epoch": 0.19, + "grad_norm": 1.693981583849735, + "learning_rate": 9.313464903987524e-06, + "loss": 0.5223, "step": 2730 }, { - "epoch": 0.29, - "grad_norm": 2.0795075403169156, - "learning_rate": 8.36191638477952e-06, - "loss": 0.6589, + "epoch": 0.19, + "grad_norm": 1.584603245358618, + "learning_rate": 9.3128836241426e-06, + "loss": 0.5646, "step": 2731 }, { - "epoch": 0.29, - "grad_norm": 1.9902950430871291, - "learning_rate": 8.360654707019406e-06, - "loss": 0.6756, + "epoch": 0.19, + "grad_norm": 1.5394962604646236, + "learning_rate": 9.312302116476552e-06, + "loss": 0.598, "step": 2732 }, { - "epoch": 0.29, - "grad_norm": 2.568567286867176, - "learning_rate": 8.359392638828142e-06, - "loss": 0.7202, + "epoch": 0.19, + "grad_norm": 1.5676138508998851, + "learning_rate": 9.311720381020093e-06, + "loss": 0.5469, "step": 2733 }, { - "epoch": 0.29, - "grad_norm": 2.7851953645089007, - "learning_rate": 8.358130180352353e-06, - "loss": 0.6707, + "epoch": 0.19, + "grad_norm": 1.5681122252900055, + "learning_rate": 9.311138417803953e-06, + "loss": 0.5413, "step": 2734 }, { - "epoch": 0.29, - "grad_norm": 2.7057531159089776, - "learning_rate": 8.356867331738706e-06, - "loss": 0.6586, + "epoch": 0.19, + "grad_norm": 1.6633756359732956, + "learning_rate": 9.310556226858874e-06, + "loss": 0.5579, "step": 2735 }, { - "epoch": 0.29, - "grad_norm": 3.2347571171882237, - "learning_rate": 8.355604093133916e-06, - "loss": 0.6705, + "epoch": 0.19, + "grad_norm": 1.7936608094995603, + "learning_rate": 9.30997380821561e-06, + "loss": 0.6586, "step": 2736 }, { - "epoch": 0.29, - "grad_norm": 2.625192631188438, - "learning_rate": 8.354340464684745e-06, - "loss": 0.7096, + "epoch": 0.19, + "grad_norm": 1.5687017952092823, + "learning_rate": 9.309391161904923e-06, + "loss": 0.5779, "step": 2737 }, { - "epoch": 0.29, - "grad_norm": 2.7137203886388295, - "learning_rate": 8.353076446537993e-06, - "loss": 0.6789, + "epoch": 0.19, + "grad_norm": 1.4469087648770864, + "learning_rate": 9.308808287957593e-06, + "loss": 0.585, "step": 2738 }, { - "epoch": 0.29, - "grad_norm": 3.1425928647546533, - "learning_rate": 8.351812038840513e-06, - "loss": 0.6174, + "epoch": 0.19, + "grad_norm": 2.1376181059123085, + "learning_rate": 9.308225186404411e-06, + "loss": 0.5995, "step": 2739 }, { - "epoch": 0.29, - "grad_norm": 3.1199663116285286, - "learning_rate": 8.3505472417392e-06, - "loss": 0.6875, + "epoch": 0.19, + "grad_norm": 1.844709613621193, + "learning_rate": 9.307641857276175e-06, + "loss": 0.6379, "step": 2740 }, { - "epoch": 0.29, - "grad_norm": 2.3558771465349433, - "learning_rate": 8.349282055380992e-06, - "loss": 0.6542, + "epoch": 0.19, + "grad_norm": 1.5398023019142713, + "learning_rate": 9.3070583006037e-06, + "loss": 0.5566, "step": 2741 }, { - "epoch": 0.29, - "grad_norm": 2.9753417802732893, - "learning_rate": 8.348016479912877e-06, - "loss": 0.6566, + "epoch": 0.19, + "grad_norm": 2.3381768289224105, + "learning_rate": 9.306474516417811e-06, + "loss": 0.6213, "step": 2742 }, { - "epoch": 0.29, - "grad_norm": 2.5685130835209935, - "learning_rate": 8.346750515481888e-06, - "loss": 0.7154, + "epoch": 0.19, + "grad_norm": 1.9972839329743721, + "learning_rate": 9.305890504749347e-06, + "loss": 0.6154, "step": 2743 }, { - "epoch": 0.29, - "grad_norm": 3.1983652825774427, - "learning_rate": 8.345484162235096e-06, - "loss": 0.6878, + "epoch": 0.19, + "grad_norm": 0.991578802275153, + "learning_rate": 9.305306265629155e-06, + "loss": 0.4661, "step": 2744 }, { - "epoch": 0.29, - "grad_norm": 8.279656071212713, - "learning_rate": 8.344217420319624e-06, - "loss": 0.6388, + "epoch": 0.19, + "grad_norm": 1.5947013747185548, + "learning_rate": 9.304721799088097e-06, + "loss": 0.623, "step": 2745 }, { - "epoch": 0.29, - "grad_norm": 2.6025787077693563, - "learning_rate": 8.342950289882641e-06, - "loss": 0.6941, + "epoch": 0.19, + "grad_norm": 2.088101787206702, + "learning_rate": 9.304137105157049e-06, + "loss": 0.524, "step": 2746 }, { - "epoch": 0.29, - "grad_norm": 2.614210374161948, - "learning_rate": 8.341682771071357e-06, - "loss": 0.6266, + "epoch": 0.19, + "grad_norm": 1.9508996860461598, + "learning_rate": 9.303552183866893e-06, + "loss": 0.5553, "step": 2747 }, { - "epoch": 0.29, - "grad_norm": 2.1048003157133306, - "learning_rate": 8.340414864033028e-06, - "loss": 0.6507, + "epoch": 0.2, + "grad_norm": 1.7034067671472835, + "learning_rate": 9.30296703524853e-06, + "loss": 0.6173, "step": 2748 }, { - "epoch": 0.29, - "grad_norm": 3.1566816578883072, - "learning_rate": 8.339146568914958e-06, - "loss": 0.7176, + "epoch": 0.2, + "grad_norm": 0.8510237740879033, + "learning_rate": 9.302381659332866e-06, + "loss": 0.4478, "step": 2749 }, { - "epoch": 0.29, - "grad_norm": 2.6545077340607643, - "learning_rate": 8.337877885864489e-06, - "loss": 0.7292, + "epoch": 0.2, + "grad_norm": 1.8232310714905635, + "learning_rate": 9.301796056150824e-06, + "loss": 0.549, "step": 2750 }, { - "epoch": 0.29, - "grad_norm": 3.9130923410553775, - "learning_rate": 8.336608815029018e-06, - "loss": 0.7339, + "epoch": 0.2, + "grad_norm": 1.671054990486113, + "learning_rate": 9.30121022573334e-06, + "loss": 0.54, "step": 2751 }, { - "epoch": 0.29, - "grad_norm": 3.184098187059293, - "learning_rate": 8.335339356555981e-06, - "loss": 0.6965, + "epoch": 0.2, + "grad_norm": 2.061084315331839, + "learning_rate": 9.300624168111357e-06, + "loss": 0.5626, "step": 2752 }, { - "epoch": 0.29, - "grad_norm": 2.14723331071444, - "learning_rate": 8.334069510592857e-06, - "loss": 0.5925, + "epoch": 0.2, + "grad_norm": 0.8902155189042619, + "learning_rate": 9.30003788331583e-06, + "loss": 0.4698, "step": 2753 }, { - "epoch": 0.29, - "grad_norm": 2.9405681252036464, - "learning_rate": 8.332799277287175e-06, - "loss": 0.6977, + "epoch": 0.2, + "grad_norm": 1.5228198516074885, + "learning_rate": 9.299451371377734e-06, + "loss": 0.5475, "step": 2754 }, { - "epoch": 0.29, - "grad_norm": 2.3880455307462882, - "learning_rate": 8.331528656786508e-06, - "loss": 0.6346, + "epoch": 0.2, + "grad_norm": 1.5752239718402736, + "learning_rate": 9.298864632328049e-06, + "loss": 0.5604, "step": 2755 }, { - "epoch": 0.29, - "grad_norm": 2.322406078061077, - "learning_rate": 8.330257649238472e-06, - "loss": 0.7288, + "epoch": 0.2, + "grad_norm": 2.0043172792116377, + "learning_rate": 9.298277666197767e-06, + "loss": 0.5861, "step": 2756 }, { - "epoch": 0.29, - "grad_norm": 2.1014011587142933, - "learning_rate": 8.328986254790729e-06, - "loss": 0.6754, + "epoch": 0.2, + "grad_norm": 0.8238068434882334, + "learning_rate": 9.29769047301789e-06, + "loss": 0.4402, "step": 2757 }, { - "epoch": 0.29, - "grad_norm": 1.3514936631244956, - "learning_rate": 8.327714473590986e-06, - "loss": 0.646, + "epoch": 0.2, + "grad_norm": 2.2185094437676742, + "learning_rate": 9.297103052819444e-06, + "loss": 0.5642, "step": 2758 }, { - "epoch": 0.29, - "grad_norm": 2.299770815204609, - "learning_rate": 8.326442305786995e-06, - "loss": 0.6682, + "epoch": 0.2, + "grad_norm": 1.9789034442139897, + "learning_rate": 9.296515405633454e-06, + "loss": 0.616, "step": 2759 }, { - "epoch": 0.29, - "grad_norm": 2.4432174028048284, - "learning_rate": 8.325169751526552e-06, - "loss": 0.7419, + "epoch": 0.2, + "grad_norm": 1.5788497012553782, + "learning_rate": 9.295927531490961e-06, + "loss": 0.5328, "step": 2760 }, { - "epoch": 0.29, - "grad_norm": 2.7511522431665765, - "learning_rate": 8.323896810957501e-06, - "loss": 0.5777, + "epoch": 0.2, + "grad_norm": 2.780561314582799, + "learning_rate": 9.295339430423018e-06, + "loss": 0.5737, "step": 2761 }, { - "epoch": 0.29, - "grad_norm": 2.7500144099538226, - "learning_rate": 8.322623484227725e-06, - "loss": 0.6688, + "epoch": 0.2, + "grad_norm": 2.077415893417806, + "learning_rate": 9.294751102460692e-06, + "loss": 0.6404, "step": 2762 }, { - "epoch": 0.29, - "grad_norm": 2.442208106975574, - "learning_rate": 8.321349771485159e-06, - "loss": 0.7288, + "epoch": 0.2, + "grad_norm": 2.0548604949755305, + "learning_rate": 9.294162547635062e-06, + "loss": 0.5284, "step": 2763 }, { - "epoch": 0.29, - "grad_norm": 2.4073668296629323, - "learning_rate": 8.320075672877776e-06, - "loss": 0.6964, + "epoch": 0.2, + "grad_norm": 0.9775033146737689, + "learning_rate": 9.293573765977215e-06, + "loss": 0.4787, "step": 2764 }, { - "epoch": 0.29, - "grad_norm": 2.973204687169632, - "learning_rate": 8.3188011885536e-06, - "loss": 0.665, + "epoch": 0.2, + "grad_norm": 2.000182574000604, + "learning_rate": 9.292984757518253e-06, + "loss": 0.5411, "step": 2765 }, { - "epoch": 0.29, - "grad_norm": 1.9341022002413333, - "learning_rate": 8.317526318660695e-06, - "loss": 0.6552, + "epoch": 0.2, + "grad_norm": 1.6998394501514196, + "learning_rate": 9.292395522289288e-06, + "loss": 0.4716, "step": 2766 }, { - "epoch": 0.29, - "grad_norm": 2.41457288085297, - "learning_rate": 8.316251063347175e-06, - "loss": 0.7097, + "epoch": 0.2, + "grad_norm": 1.7189695574857438, + "learning_rate": 9.291806060321449e-06, + "loss": 0.5608, "step": 2767 }, { - "epoch": 0.29, - "grad_norm": 2.5800526852961263, - "learning_rate": 8.314975422761187e-06, - "loss": 0.5778, + "epoch": 0.2, + "grad_norm": 2.110473683346567, + "learning_rate": 9.29121637164587e-06, + "loss": 0.5241, "step": 2768 }, { - "epoch": 0.29, - "grad_norm": 2.752826125090857, - "learning_rate": 8.313699397050941e-06, - "loss": 0.6198, + "epoch": 0.2, + "grad_norm": 0.8534249147036781, + "learning_rate": 9.290626456293701e-06, + "loss": 0.4485, "step": 2769 }, { - "epoch": 0.29, - "grad_norm": 2.3181883296463717, - "learning_rate": 8.312422986364677e-06, - "loss": 0.546, + "epoch": 0.2, + "grad_norm": 2.3891785787577096, + "learning_rate": 9.290036314296104e-06, + "loss": 0.6317, "step": 2770 }, { - "epoch": 0.29, - "grad_norm": 2.3923531457438534, - "learning_rate": 8.311146190850687e-06, - "loss": 0.7019, + "epoch": 0.2, + "grad_norm": 1.5502211596037836, + "learning_rate": 9.289445945684255e-06, + "loss": 0.5107, "step": 2771 }, { - "epoch": 0.29, - "grad_norm": 2.57435657748528, - "learning_rate": 8.309869010657303e-06, - "loss": 0.6215, + "epoch": 0.2, + "grad_norm": 2.3289119559217335, + "learning_rate": 9.288855350489334e-06, + "loss": 0.5443, "step": 2772 }, { - "epoch": 0.29, - "grad_norm": 1.9661259285016583, - "learning_rate": 8.308591445932905e-06, - "loss": 0.6052, + "epoch": 0.2, + "grad_norm": 1.9206460516814714, + "learning_rate": 9.288264528742542e-06, + "loss": 0.5944, "step": 2773 }, { - "epoch": 0.29, - "grad_norm": 2.1300824586691043, - "learning_rate": 8.307313496825918e-06, - "loss": 0.6245, + "epoch": 0.2, + "grad_norm": 2.2887620921149585, + "learning_rate": 9.287673480475086e-06, + "loss": 0.5925, "step": 2774 }, { - "epoch": 0.29, - "grad_norm": 2.7492441683809816, - "learning_rate": 8.306035163484806e-06, - "loss": 0.6852, + "epoch": 0.2, + "grad_norm": 1.6678528460446758, + "learning_rate": 9.28708220571819e-06, + "loss": 0.4989, "step": 2775 }, { - "epoch": 0.29, - "grad_norm": 2.4951222437462484, - "learning_rate": 8.30475644605809e-06, - "loss": 0.6103, + "epoch": 0.2, + "grad_norm": 1.740611404542252, + "learning_rate": 9.286490704503082e-06, + "loss": 0.5704, "step": 2776 }, { - "epoch": 0.29, - "grad_norm": 2.2323509496392493, - "learning_rate": 8.30347734469432e-06, - "loss": 0.6149, + "epoch": 0.2, + "grad_norm": 1.5596327218244386, + "learning_rate": 9.285898976861012e-06, + "loss": 0.5804, "step": 2777 }, { - "epoch": 0.29, - "grad_norm": 3.702733593456729, - "learning_rate": 8.302197859542104e-06, - "loss": 0.6772, + "epoch": 0.2, + "grad_norm": 2.0952128570318793, + "learning_rate": 9.285307022823235e-06, + "loss": 0.5894, "step": 2778 }, { - "epoch": 0.29, - "grad_norm": 2.029812775065599, - "learning_rate": 8.300917990750085e-06, - "loss": 0.669, + "epoch": 0.2, + "grad_norm": 1.791813107059716, + "learning_rate": 9.284714842421022e-06, + "loss": 0.5395, "step": 2779 }, { - "epoch": 0.29, - "grad_norm": 2.436939042536463, - "learning_rate": 8.299637738466956e-06, - "loss": 0.6703, + "epoch": 0.2, + "grad_norm": 1.444412714915466, + "learning_rate": 9.284122435685652e-06, + "loss": 0.4949, "step": 2780 }, { - "epoch": 0.29, - "grad_norm": 7.064859457627149, - "learning_rate": 8.298357102841452e-06, - "loss": 0.6941, + "epoch": 0.2, + "grad_norm": 2.2455118123722344, + "learning_rate": 9.283529802648417e-06, + "loss": 0.5367, "step": 2781 }, { - "epoch": 0.29, - "grad_norm": 2.290933554699684, - "learning_rate": 8.297076084022355e-06, - "loss": 0.585, + "epoch": 0.2, + "grad_norm": 2.7313459060112293, + "learning_rate": 9.282936943340623e-06, + "loss": 0.5786, "step": 2782 }, { - "epoch": 0.29, - "grad_norm": 2.7292159300881327, - "learning_rate": 8.29579468215849e-06, - "loss": 0.6463, + "epoch": 0.2, + "grad_norm": 1.5245273619710156, + "learning_rate": 9.28234385779359e-06, + "loss": 0.5678, "step": 2783 }, { - "epoch": 0.29, - "grad_norm": 2.4076744166382755, - "learning_rate": 8.294512897398725e-06, - "loss": 0.6891, + "epoch": 0.2, + "grad_norm": 2.1017104794512114, + "learning_rate": 9.281750546038642e-06, + "loss": 0.5955, "step": 2784 }, { - "epoch": 0.29, - "grad_norm": 2.400183386823079, - "learning_rate": 8.293230729891976e-06, - "loss": 0.6797, + "epoch": 0.2, + "grad_norm": 1.9365813084822805, + "learning_rate": 9.281157008107121e-06, + "loss": 0.586, "step": 2785 }, { - "epoch": 0.29, - "grad_norm": 2.5300566156875215, - "learning_rate": 8.2919481797872e-06, - "loss": 0.6729, + "epoch": 0.2, + "grad_norm": 1.857077446994876, + "learning_rate": 9.280563244030381e-06, + "loss": 0.6133, "step": 2786 }, { - "epoch": 0.29, - "grad_norm": 2.504139135974389, - "learning_rate": 8.2906652472334e-06, - "loss": 0.7771, + "epoch": 0.2, + "grad_norm": 1.9986495017904966, + "learning_rate": 9.279969253839785e-06, + "loss": 0.5311, "step": 2787 }, { - "epoch": 0.29, - "grad_norm": 2.4033755762522127, - "learning_rate": 8.289381932379625e-06, - "loss": 0.7255, + "epoch": 0.2, + "grad_norm": 1.7843919570753788, + "learning_rate": 9.279375037566712e-06, + "loss": 0.5904, "step": 2788 }, { - "epoch": 0.29, - "grad_norm": 2.130391692034721, - "learning_rate": 8.288098235374966e-06, - "loss": 0.6504, + "epoch": 0.2, + "grad_norm": 1.9294770715789524, + "learning_rate": 9.27878059524255e-06, + "loss": 0.5729, "step": 2789 }, { - "epoch": 0.29, - "grad_norm": 2.3119004788902524, - "learning_rate": 8.286814156368559e-06, - "loss": 0.7532, + "epoch": 0.2, + "grad_norm": 2.0134654628970674, + "learning_rate": 9.278185926898696e-06, + "loss": 0.5938, "step": 2790 }, { - "epoch": 0.29, - "grad_norm": 12.473032165918204, - "learning_rate": 8.285529695509585e-06, - "loss": 0.7055, + "epoch": 0.2, + "grad_norm": 2.515227871471421, + "learning_rate": 9.277591032566565e-06, + "loss": 0.6, "step": 2791 }, { - "epoch": 0.29, - "grad_norm": 2.874206287893155, - "learning_rate": 8.284244852947265e-06, - "loss": 0.7682, + "epoch": 0.2, + "grad_norm": 2.0360184705045654, + "learning_rate": 9.276995912277582e-06, + "loss": 0.6007, "step": 2792 }, { - "epoch": 0.29, - "grad_norm": 2.3354999888656627, - "learning_rate": 8.282959628830875e-06, - "loss": 0.7107, + "epoch": 0.2, + "grad_norm": 0.8984035466232516, + "learning_rate": 9.276400566063182e-06, + "loss": 0.4697, "step": 2793 }, { - "epoch": 0.29, - "grad_norm": 2.6099032292361617, - "learning_rate": 8.281674023309725e-06, - "loss": 0.6281, + "epoch": 0.2, + "grad_norm": 1.9833654984613605, + "learning_rate": 9.275804993954813e-06, + "loss": 0.5487, "step": 2794 }, { - "epoch": 0.29, - "grad_norm": 2.232202918075071, - "learning_rate": 8.280388036533171e-06, - "loss": 0.6399, + "epoch": 0.2, + "grad_norm": 1.8656881167730752, + "learning_rate": 9.275209195983939e-06, + "loss": 0.6146, "step": 2795 }, { - "epoch": 0.29, - "grad_norm": 2.491569330119017, - "learning_rate": 8.27910166865062e-06, - "loss": 0.626, + "epoch": 0.2, + "grad_norm": 2.037683052109617, + "learning_rate": 9.274613172182025e-06, + "loss": 0.6077, "step": 2796 }, { - "epoch": 0.29, - "grad_norm": 2.2273954138081073, - "learning_rate": 8.277814919811516e-06, - "loss": 0.6008, + "epoch": 0.2, + "grad_norm": 2.121614600583231, + "learning_rate": 9.274016922580561e-06, + "loss": 0.6099, "step": 2797 }, { - "epoch": 0.29, - "grad_norm": 2.2945544456627385, - "learning_rate": 8.276527790165349e-06, - "loss": 0.6461, + "epoch": 0.2, + "grad_norm": 1.7251638364429225, + "learning_rate": 9.273420447211041e-06, + "loss": 0.6012, "step": 2798 }, { - "epoch": 0.29, - "grad_norm": 2.277058416467495, - "learning_rate": 8.275240279861655e-06, - "loss": 0.69, + "epoch": 0.2, + "grad_norm": 1.4061280686221869, + "learning_rate": 9.272823746104972e-06, + "loss": 0.4862, "step": 2799 }, { - "epoch": 0.29, - "grad_norm": 2.2731253809981653, - "learning_rate": 8.273952389050015e-06, - "loss": 0.6764, + "epoch": 0.2, + "grad_norm": 1.3959926916935366, + "learning_rate": 9.272226819293875e-06, + "loss": 0.573, "step": 2800 }, { - "epoch": 0.29, - "grad_norm": 2.1917315114226636, - "learning_rate": 8.272664117880047e-06, - "loss": 0.6476, + "epoch": 0.2, + "grad_norm": 1.5441517903712119, + "learning_rate": 9.27162966680928e-06, + "loss": 0.5782, "step": 2801 }, { - "epoch": 0.29, - "grad_norm": 2.25326375571558, - "learning_rate": 8.271375466501424e-06, - "loss": 0.7102, + "epoch": 0.2, + "grad_norm": 1.9604951272425812, + "learning_rate": 9.271032288682732e-06, + "loss": 0.5661, "step": 2802 }, { - "epoch": 0.29, - "grad_norm": 2.302993394481045, - "learning_rate": 8.270086435063856e-06, - "loss": 0.6434, + "epoch": 0.2, + "grad_norm": 1.6611840679445224, + "learning_rate": 9.270434684945788e-06, + "loss": 0.6103, "step": 2803 }, { - "epoch": 0.3, - "grad_norm": 2.7166993208874195, - "learning_rate": 8.268797023717098e-06, - "loss": 0.6369, + "epoch": 0.2, + "grad_norm": 1.666850473547599, + "learning_rate": 9.269836855630012e-06, + "loss": 0.5854, "step": 2804 }, { - "epoch": 0.3, - "grad_norm": 2.2576333540166673, - "learning_rate": 8.267507232610952e-06, - "loss": 0.7091, + "epoch": 0.2, + "grad_norm": 1.9634289015052904, + "learning_rate": 9.269238800766984e-06, + "loss": 0.546, "step": 2805 }, { - "epoch": 0.3, - "grad_norm": 2.834052961933635, - "learning_rate": 8.26621706189526e-06, - "loss": 0.6522, + "epoch": 0.2, + "grad_norm": 2.325465273646718, + "learning_rate": 9.268640520388299e-06, + "loss": 0.61, "step": 2806 }, { - "epoch": 0.3, - "grad_norm": 2.4814364257476966, - "learning_rate": 8.264926511719912e-06, - "loss": 0.6399, + "epoch": 0.2, + "grad_norm": 1.6763124790106425, + "learning_rate": 9.268042014525556e-06, + "loss": 0.5733, "step": 2807 }, { - "epoch": 0.3, - "grad_norm": 2.8842459517526953, - "learning_rate": 8.26363558223484e-06, - "loss": 0.7116, + "epoch": 0.2, + "grad_norm": 1.871979398344286, + "learning_rate": 9.267443283210372e-06, + "loss": 0.5879, "step": 2808 }, { - "epoch": 0.3, - "grad_norm": 1.2082080106969904, - "learning_rate": 8.26234427359002e-06, - "loss": 0.6688, + "epoch": 0.2, + "grad_norm": 1.8762517597804866, + "learning_rate": 9.266844326474373e-06, + "loss": 0.6609, "step": 2809 }, { - "epoch": 0.3, - "grad_norm": 2.234644633018572, - "learning_rate": 8.261052585935471e-06, - "loss": 0.6346, + "epoch": 0.2, + "grad_norm": 1.622471112412664, + "learning_rate": 9.266245144349201e-06, + "loss": 0.6715, "step": 2810 }, { - "epoch": 0.3, - "grad_norm": 2.4322865695650178, - "learning_rate": 8.259760519421263e-06, - "loss": 0.7054, + "epoch": 0.2, + "grad_norm": 1.5812503040460713, + "learning_rate": 9.265645736866502e-06, + "loss": 0.5448, "step": 2811 }, { - "epoch": 0.3, - "grad_norm": 2.6564893401792675, - "learning_rate": 8.258468074197499e-06, - "loss": 0.6955, + "epoch": 0.2, + "grad_norm": 18.176651167269117, + "learning_rate": 9.265046104057943e-06, + "loss": 0.5652, "step": 2812 }, { - "epoch": 0.3, - "grad_norm": 2.266036275854144, - "learning_rate": 8.257175250414333e-06, - "loss": 0.7159, + "epoch": 0.2, + "grad_norm": 1.7181365579228223, + "learning_rate": 9.264446245955196e-06, + "loss": 0.641, "step": 2813 }, { - "epoch": 0.3, - "grad_norm": 2.762553186968921, - "learning_rate": 8.255882048221961e-06, - "loss": 0.7056, + "epoch": 0.2, + "grad_norm": 1.867773637121081, + "learning_rate": 9.263846162589948e-06, + "loss": 0.5624, "step": 2814 }, { - "epoch": 0.3, - "grad_norm": 4.139967136595774, - "learning_rate": 8.254588467770628e-06, - "loss": 0.707, + "epoch": 0.2, + "grad_norm": 2.093995861693024, + "learning_rate": 9.263245853993899e-06, + "loss": 0.5532, "step": 2815 }, { - "epoch": 0.3, - "grad_norm": 3.9568494330619317, - "learning_rate": 8.253294509210612e-06, - "loss": 0.6702, + "epoch": 0.2, + "grad_norm": 2.8234002771954954, + "learning_rate": 9.262645320198757e-06, + "loss": 0.5422, "step": 2816 }, { - "epoch": 0.3, - "grad_norm": 3.031085393262171, - "learning_rate": 8.252000172692244e-06, - "loss": 0.6967, + "epoch": 0.2, + "grad_norm": 1.7635189003397638, + "learning_rate": 9.262044561236246e-06, + "loss": 0.5482, "step": 2817 }, { - "epoch": 0.3, - "grad_norm": 3.3355616466171094, - "learning_rate": 8.250705458365897e-06, - "loss": 0.7047, + "epoch": 0.2, + "grad_norm": 2.962676811626448, + "learning_rate": 9.261443577138098e-06, + "loss": 0.6296, "step": 2818 }, { - "epoch": 0.3, - "grad_norm": 2.3126709857674936, - "learning_rate": 8.249410366381987e-06, - "loss": 0.6727, + "epoch": 0.2, + "grad_norm": 1.8032146205806825, + "learning_rate": 9.260842367936063e-06, + "loss": 0.5946, "step": 2819 }, { - "epoch": 0.3, - "grad_norm": 2.857054433557827, - "learning_rate": 8.248114896890975e-06, - "loss": 0.6336, + "epoch": 0.2, + "grad_norm": 1.7676538069669736, + "learning_rate": 9.260240933661894e-06, + "loss": 0.6219, "step": 2820 }, { - "epoch": 0.3, - "grad_norm": 3.0926046214596106, - "learning_rate": 8.246819050043363e-06, - "loss": 0.6691, + "epoch": 0.2, + "grad_norm": 2.133037881416358, + "learning_rate": 9.259639274347365e-06, + "loss": 0.5649, "step": 2821 }, { - "epoch": 0.3, - "grad_norm": 2.618242351375955, - "learning_rate": 8.245522825989697e-06, - "loss": 0.6733, + "epoch": 0.2, + "grad_norm": 2.2544743080111105, + "learning_rate": 9.259037390024254e-06, + "loss": 0.6794, "step": 2822 }, { - "epoch": 0.3, - "grad_norm": 2.7640385714230673, - "learning_rate": 8.244226224880574e-06, - "loss": 0.6313, + "epoch": 0.2, + "grad_norm": 1.8091130409575953, + "learning_rate": 9.258435280724359e-06, + "loss": 0.5313, "step": 2823 }, { - "epoch": 0.3, - "grad_norm": 2.5354134106997948, - "learning_rate": 8.242929246866624e-06, - "loss": 0.7069, + "epoch": 0.2, + "grad_norm": 1.4620843542914332, + "learning_rate": 9.257832946479481e-06, + "loss": 0.5196, "step": 2824 }, { - "epoch": 0.3, - "grad_norm": 2.933933284256052, - "learning_rate": 8.24163189209853e-06, - "loss": 0.749, + "epoch": 0.2, + "grad_norm": 1.8066344207198557, + "learning_rate": 9.25723038732144e-06, + "loss": 0.5854, "step": 2825 }, { - "epoch": 0.3, - "grad_norm": 2.8633014033698627, - "learning_rate": 8.240334160727013e-06, - "loss": 0.6782, + "epoch": 0.2, + "grad_norm": 1.8705484742510068, + "learning_rate": 9.256627603282065e-06, + "loss": 0.5888, "step": 2826 }, { - "epoch": 0.3, - "grad_norm": 2.7941442636390543, - "learning_rate": 8.23903605290284e-06, - "loss": 0.6433, + "epoch": 0.2, + "grad_norm": 1.777180208071855, + "learning_rate": 9.256024594393195e-06, + "loss": 0.6505, "step": 2827 }, { - "epoch": 0.3, - "grad_norm": 2.3734190039511978, - "learning_rate": 8.23773756877682e-06, - "loss": 0.6341, + "epoch": 0.2, + "grad_norm": 1.6306733296660285, + "learning_rate": 9.255421360686685e-06, + "loss": 0.5417, "step": 2828 }, { - "epoch": 0.3, - "grad_norm": 4.306290887870174, - "learning_rate": 8.236438708499811e-06, - "loss": 0.7399, + "epoch": 0.2, + "grad_norm": 1.6573642234594272, + "learning_rate": 9.2548179021944e-06, + "loss": 0.5872, "step": 2829 }, { - "epoch": 0.3, - "grad_norm": 3.327313393460941, - "learning_rate": 8.235139472222708e-06, - "loss": 0.6106, + "epoch": 0.2, + "grad_norm": 1.865915024745197, + "learning_rate": 9.254214218948217e-06, + "loss": 0.6183, "step": 2830 }, { - "epoch": 0.3, - "grad_norm": 2.842091527633625, - "learning_rate": 8.233839860096453e-06, - "loss": 0.6594, + "epoch": 0.2, + "grad_norm": 0.8541452353607615, + "learning_rate": 9.253610310980023e-06, + "loss": 0.5014, "step": 2831 }, { - "epoch": 0.3, - "grad_norm": 2.427751709970699, - "learning_rate": 8.23253987227203e-06, - "loss": 0.757, + "epoch": 0.2, + "grad_norm": 0.8707580861874467, + "learning_rate": 9.253006178321719e-06, + "loss": 0.4758, "step": 2832 }, { - "epoch": 0.3, - "grad_norm": 4.827636901802302, - "learning_rate": 8.23123950890047e-06, - "loss": 0.7158, + "epoch": 0.2, + "grad_norm": 1.772135521118272, + "learning_rate": 9.252401821005216e-06, + "loss": 0.5851, "step": 2833 }, { - "epoch": 0.3, - "grad_norm": 2.310647756260461, - "learning_rate": 8.229938770132843e-06, - "loss": 0.6142, + "epoch": 0.2, + "grad_norm": 4.192901032040957, + "learning_rate": 9.25179723906244e-06, + "loss": 0.6441, "step": 2834 }, { - "epoch": 0.3, - "grad_norm": 3.439737457921449, - "learning_rate": 8.228637656120268e-06, - "loss": 0.6859, + "epoch": 0.2, + "grad_norm": 1.5529191963129034, + "learning_rate": 9.251192432525328e-06, + "loss": 0.5416, "step": 2835 }, { - "epoch": 0.3, - "grad_norm": 3.465299362478447, - "learning_rate": 8.227336167013901e-06, - "loss": 0.619, + "epoch": 0.2, + "grad_norm": 1.5464017444899205, + "learning_rate": 9.250587401425828e-06, + "loss": 0.5693, "step": 2836 }, { - "epoch": 0.3, - "grad_norm": 5.161811987054101, - "learning_rate": 8.22603430296495e-06, - "loss": 0.6885, + "epoch": 0.2, + "grad_norm": 1.907646435883756, + "learning_rate": 9.249982145795897e-06, + "loss": 0.5664, "step": 2837 }, { - "epoch": 0.3, - "grad_norm": 13.016909893025721, - "learning_rate": 8.224732064124658e-06, - "loss": 0.7578, + "epoch": 0.2, + "grad_norm": 14.67019518429775, + "learning_rate": 9.249376665667509e-06, + "loss": 0.554, "step": 2838 }, { - "epoch": 0.3, - "grad_norm": 3.29770830365415, - "learning_rate": 8.223429450644317e-06, - "loss": 0.5983, + "epoch": 0.2, + "grad_norm": 1.6114690998487287, + "learning_rate": 9.248770961072647e-06, + "loss": 0.5143, "step": 2839 }, { - "epoch": 0.3, - "grad_norm": 2.74582961488932, - "learning_rate": 8.222126462675259e-06, - "loss": 0.703, + "epoch": 0.2, + "grad_norm": 1.6446154935481185, + "learning_rate": 9.248165032043304e-06, + "loss": 0.5739, "step": 2840 }, { - "epoch": 0.3, - "grad_norm": 2.839825341685874, - "learning_rate": 8.220823100368865e-06, - "loss": 0.7577, + "epoch": 0.2, + "grad_norm": 2.290768937714046, + "learning_rate": 9.247558878611489e-06, + "loss": 0.5079, "step": 2841 }, { - "epoch": 0.3, - "grad_norm": 2.151612849557347, - "learning_rate": 8.219519363876552e-06, - "loss": 0.7437, + "epoch": 0.2, + "grad_norm": 1.5423741829977375, + "learning_rate": 9.246952500809223e-06, + "loss": 0.6527, "step": 2842 }, { - "epoch": 0.3, - "grad_norm": 2.2813563053260157, - "learning_rate": 8.218215253349785e-06, - "loss": 0.7145, + "epoch": 0.2, + "grad_norm": 1.7169288362807484, + "learning_rate": 9.246345898668534e-06, + "loss": 0.514, "step": 2843 }, { - "epoch": 0.3, - "grad_norm": 2.8740576992118227, - "learning_rate": 8.216910768940075e-06, - "loss": 0.6663, + "epoch": 0.2, + "grad_norm": 1.7315466140140183, + "learning_rate": 9.245739072221467e-06, + "loss": 0.5914, "step": 2844 }, { - "epoch": 0.3, - "grad_norm": 2.3832922463971293, - "learning_rate": 8.215605910798972e-06, - "loss": 0.6398, + "epoch": 0.2, + "grad_norm": 1.997609481479007, + "learning_rate": 9.245132021500076e-06, + "loss": 0.6162, "step": 2845 }, { - "epoch": 0.3, - "grad_norm": 2.087398052989275, - "learning_rate": 8.21430067907807e-06, - "loss": 0.6233, + "epoch": 0.2, + "grad_norm": 1.8144813243882123, + "learning_rate": 9.244524746536427e-06, + "loss": 0.5997, "step": 2846 }, { - "epoch": 0.3, - "grad_norm": 3.1121220094877007, - "learning_rate": 8.212995073929002e-06, - "loss": 0.7069, + "epoch": 0.2, + "grad_norm": 1.6601213351828352, + "learning_rate": 9.243917247362597e-06, + "loss": 0.5412, "step": 2847 }, { - "epoch": 0.3, - "grad_norm": 1.1733089591500396, - "learning_rate": 8.211689095503457e-06, - "loss": 0.6373, + "epoch": 0.2, + "grad_norm": 1.8440281798990021, + "learning_rate": 9.24330952401068e-06, + "loss": 0.6, "step": 2848 }, { - "epoch": 0.3, - "grad_norm": 2.53436764490174, - "learning_rate": 8.210382743953159e-06, - "loss": 0.6675, + "epoch": 0.2, + "grad_norm": 1.7971340389663961, + "learning_rate": 9.242701576512774e-06, + "loss": 0.5972, "step": 2849 }, { - "epoch": 0.3, - "grad_norm": 2.9962280045699328, - "learning_rate": 8.20907601942987e-06, - "loss": 0.6339, + "epoch": 0.2, + "grad_norm": 2.138139461080658, + "learning_rate": 9.242093404900995e-06, + "loss": 0.5313, "step": 2850 }, { - "epoch": 0.3, - "grad_norm": 2.6687863772007625, - "learning_rate": 8.207768922085408e-06, - "loss": 0.6427, + "epoch": 0.2, + "grad_norm": 1.668173632279138, + "learning_rate": 9.241485009207469e-06, + "loss": 0.5332, "step": 2851 }, { - "epoch": 0.3, - "grad_norm": 2.4367057789328523, - "learning_rate": 8.206461452071625e-06, - "loss": 0.6382, + "epoch": 0.2, + "grad_norm": 1.6202216299479468, + "learning_rate": 9.24087638946433e-06, + "loss": 0.5339, "step": 2852 }, { - "epoch": 0.3, - "grad_norm": 2.494084327088906, - "learning_rate": 8.20515360954042e-06, - "loss": 0.7028, + "epoch": 0.2, + "grad_norm": 1.250698925091978, + "learning_rate": 9.240267545703733e-06, + "loss": 0.4896, "step": 2853 }, { - "epoch": 0.3, - "grad_norm": 3.0800899409941738, - "learning_rate": 8.203845394643732e-06, - "loss": 0.6296, + "epoch": 0.2, + "grad_norm": 1.9421845662123198, + "learning_rate": 9.239658477957837e-06, + "loss": 0.6124, "step": 2854 }, { - "epoch": 0.3, - "grad_norm": 10.158844023386168, - "learning_rate": 8.202536807533548e-06, - "loss": 0.7244, + "epoch": 0.2, + "grad_norm": 1.625590848636501, + "learning_rate": 9.239049186258811e-06, + "loss": 0.6287, "step": 2855 }, { - "epoch": 0.3, - "grad_norm": 2.667508619561635, - "learning_rate": 8.201227848361895e-06, - "loss": 0.7001, + "epoch": 0.2, + "grad_norm": 1.7032259866182387, + "learning_rate": 9.238439670638844e-06, + "loss": 0.6122, "step": 2856 }, { - "epoch": 0.3, - "grad_norm": 2.53043885098298, - "learning_rate": 8.199918517280848e-06, - "loss": 0.6844, + "epoch": 0.2, + "grad_norm": 1.6796453226253125, + "learning_rate": 9.237829931130132e-06, + "loss": 0.5678, "step": 2857 }, { - "epoch": 0.3, - "grad_norm": 3.114762138835378, - "learning_rate": 8.198608814442513e-06, - "loss": 0.6376, + "epoch": 0.2, + "grad_norm": 0.7696402479926759, + "learning_rate": 9.237219967764884e-06, + "loss": 0.4786, "step": 2858 }, { - "epoch": 0.3, - "grad_norm": 2.9348916221619814, - "learning_rate": 8.197298739999055e-06, - "loss": 0.6631, + "epoch": 0.2, + "grad_norm": 1.8654624172911678, + "learning_rate": 9.236609780575318e-06, + "loss": 0.577, "step": 2859 }, { - "epoch": 0.3, - "grad_norm": 2.2713830464167466, - "learning_rate": 8.19598829410267e-06, - "loss": 0.5696, + "epoch": 0.2, + "grad_norm": 3.012730219312984, + "learning_rate": 9.23599936959367e-06, + "loss": 0.6212, "step": 2860 }, { - "epoch": 0.3, - "grad_norm": 2.640428459107109, - "learning_rate": 8.194677476905604e-06, - "loss": 0.6494, + "epoch": 0.2, + "grad_norm": 1.6802048966716066, + "learning_rate": 9.23538873485218e-06, + "loss": 0.4906, "step": 2861 }, { - "epoch": 0.3, - "grad_norm": 2.458537500123125, - "learning_rate": 8.193366288560144e-06, - "loss": 0.7073, + "epoch": 0.2, + "grad_norm": 1.6741699659185325, + "learning_rate": 9.234777876383107e-06, + "loss": 0.6046, "step": 2862 }, { - "epoch": 0.3, - "grad_norm": 2.789431886976531, - "learning_rate": 8.192054729218621e-06, - "loss": 0.6223, + "epoch": 0.2, + "grad_norm": 1.7592852471663012, + "learning_rate": 9.234166794218718e-06, + "loss": 0.6263, "step": 2863 }, { - "epoch": 0.3, - "grad_norm": 2.0346142539870478, - "learning_rate": 8.190742799033404e-06, - "loss": 0.6502, + "epoch": 0.2, + "grad_norm": 1.737544124461475, + "learning_rate": 9.233555488391289e-06, + "loss": 0.5478, "step": 2864 }, { - "epoch": 0.3, - "grad_norm": 2.523541617507126, - "learning_rate": 8.189430498156914e-06, - "loss": 0.6346, + "epoch": 0.2, + "grad_norm": 1.7589584993983127, + "learning_rate": 9.232943958933117e-06, + "loss": 0.6087, "step": 2865 }, { - "epoch": 0.3, - "grad_norm": 2.2087194523547145, - "learning_rate": 8.18811782674161e-06, - "loss": 0.6108, + "epoch": 0.2, + "grad_norm": 1.5481233501097502, + "learning_rate": 9.232332205876498e-06, + "loss": 0.5838, "step": 2866 }, { - "epoch": 0.3, - "grad_norm": 2.491402481550859, - "learning_rate": 8.18680478493999e-06, - "loss": 0.6853, + "epoch": 0.2, + "grad_norm": 1.5694267599773661, + "learning_rate": 9.231720229253753e-06, + "loss": 0.516, "step": 2867 }, { - "epoch": 0.3, - "grad_norm": 3.026321341321937, - "learning_rate": 8.185491372904604e-06, - "loss": 0.6313, + "epoch": 0.2, + "grad_norm": 1.4905733704470852, + "learning_rate": 9.231108029097207e-06, + "loss": 0.5854, "step": 2868 }, { - "epoch": 0.3, - "grad_norm": 2.6138591078805526, - "learning_rate": 8.184177590788038e-06, - "loss": 0.6652, + "epoch": 0.2, + "grad_norm": 1.6344975619870183, + "learning_rate": 9.230495605439195e-06, + "loss": 0.5488, "step": 2869 }, { - "epoch": 0.3, - "grad_norm": 2.519595862218996, - "learning_rate": 8.182863438742922e-06, - "loss": 0.7254, + "epoch": 0.2, + "grad_norm": 1.6146690577385583, + "learning_rate": 9.229882958312074e-06, + "loss": 0.5881, "step": 2870 }, { - "epoch": 0.3, - "grad_norm": 2.4026200308540617, - "learning_rate": 8.181548916921935e-06, - "loss": 0.5704, + "epoch": 0.2, + "grad_norm": 2.538163768919168, + "learning_rate": 9.229270087748201e-06, + "loss": 0.5996, "step": 2871 }, { - "epoch": 0.3, - "grad_norm": 2.580908432962159, - "learning_rate": 8.180234025477792e-06, - "loss": 0.6507, + "epoch": 0.2, + "grad_norm": 2.44517415467902, + "learning_rate": 9.22865699377995e-06, + "loss": 0.6062, "step": 2872 }, { - "epoch": 0.3, - "grad_norm": 2.268967709119169, - "learning_rate": 8.178918764563251e-06, - "loss": 0.718, + "epoch": 0.2, + "grad_norm": 1.9494840114356795, + "learning_rate": 9.22804367643971e-06, + "loss": 0.5271, "step": 2873 }, { - "epoch": 0.3, - "grad_norm": 2.2396586103792795, - "learning_rate": 8.177603134331119e-06, - "loss": 0.59, + "epoch": 0.2, + "grad_norm": 0.9095871776060641, + "learning_rate": 9.227430135759875e-06, + "loss": 0.464, "step": 2874 }, { - "epoch": 0.3, - "grad_norm": 2.7093881022757165, - "learning_rate": 8.17628713493424e-06, - "loss": 0.6465, + "epoch": 0.2, + "grad_norm": 1.6421233792030172, + "learning_rate": 9.226816371772855e-06, + "loss": 0.539, "step": 2875 }, { - "epoch": 0.3, - "grad_norm": 2.2292433893864105, - "learning_rate": 8.174970766525503e-06, - "loss": 0.6516, + "epoch": 0.2, + "grad_norm": 1.7979969520696786, + "learning_rate": 9.226202384511074e-06, + "loss": 0.6137, "step": 2876 }, { - "epoch": 0.3, - "grad_norm": 3.0744331784154943, - "learning_rate": 8.17365402925784e-06, - "loss": 0.6089, + "epoch": 0.2, + "grad_norm": 2.586468132922063, + "learning_rate": 9.225588174006962e-06, + "loss": 0.6047, "step": 2877 }, { - "epoch": 0.3, - "grad_norm": 2.563571554597488, - "learning_rate": 8.172336923284225e-06, - "loss": 0.6704, + "epoch": 0.2, + "grad_norm": 2.2536909154667213, + "learning_rate": 9.224973740292964e-06, + "loss": 0.4742, "step": 2878 }, { - "epoch": 0.3, - "grad_norm": 2.8782161919357705, - "learning_rate": 8.17101944875768e-06, - "loss": 0.7115, + "epoch": 0.2, + "grad_norm": 1.7964701908226992, + "learning_rate": 9.224359083401538e-06, + "loss": 0.5629, "step": 2879 }, { - "epoch": 0.3, - "grad_norm": 2.662681630857156, - "learning_rate": 8.16970160583126e-06, - "loss": 0.6443, + "epoch": 0.2, + "grad_norm": 3.811896545565224, + "learning_rate": 9.223744203365148e-06, + "loss": 0.5241, "step": 2880 }, { - "epoch": 0.3, - "grad_norm": 2.35955388779492, - "learning_rate": 8.16838339465807e-06, - "loss": 0.7116, + "epoch": 0.2, + "grad_norm": 1.8604214087018058, + "learning_rate": 9.22312910021628e-06, + "loss": 0.5737, "step": 2881 }, { - "epoch": 0.3, - "grad_norm": 3.1676445753173974, - "learning_rate": 8.167064815391254e-06, - "loss": 0.6053, + "epoch": 0.2, + "grad_norm": 1.8047742776489735, + "learning_rate": 9.222513773987422e-06, + "loss": 0.6403, "step": 2882 }, { - "epoch": 0.3, - "grad_norm": 2.466081183525901, - "learning_rate": 8.165745868184006e-06, - "loss": 0.6041, + "epoch": 0.2, + "grad_norm": 2.0792131050815774, + "learning_rate": 9.221898224711078e-06, + "loss": 0.5815, "step": 2883 }, { - "epoch": 0.3, - "grad_norm": 2.0720974841936783, - "learning_rate": 8.164426553189553e-06, - "loss": 0.6562, + "epoch": 0.2, + "grad_norm": 1.8363959958204494, + "learning_rate": 9.221282452419767e-06, + "loss": 0.5625, "step": 2884 }, { - "epoch": 0.3, - "grad_norm": 3.740189082503985, - "learning_rate": 8.16310687056117e-06, - "loss": 0.7227, + "epoch": 0.2, + "grad_norm": 1.696690120125854, + "learning_rate": 9.220666457146011e-06, + "loss": 0.6006, "step": 2885 }, { - "epoch": 0.3, - "grad_norm": 2.978902500808749, - "learning_rate": 8.161786820452176e-06, - "loss": 0.6807, + "epoch": 0.2, + "grad_norm": 1.6124158671281108, + "learning_rate": 9.220050238922351e-06, + "loss": 0.5504, "step": 2886 }, { - "epoch": 0.3, - "grad_norm": 2.301320445626182, - "learning_rate": 8.160466403015928e-06, - "loss": 0.726, + "epoch": 0.2, + "grad_norm": 1.735127196658405, + "learning_rate": 9.219433797781338e-06, + "loss": 0.6253, "step": 2887 }, { - "epoch": 0.3, - "grad_norm": 2.267037663828458, - "learning_rate": 8.159145618405828e-06, - "loss": 0.6489, + "epoch": 0.2, + "grad_norm": 1.6455567825470072, + "learning_rate": 9.218817133755536e-06, + "loss": 0.6307, "step": 2888 }, { - "epoch": 0.3, - "grad_norm": 2.015189747260077, - "learning_rate": 8.157824466775324e-06, - "loss": 0.6653, + "epoch": 0.21, + "grad_norm": 0.8773132925293483, + "learning_rate": 9.218200246877516e-06, + "loss": 0.4761, "step": 2889 }, { - "epoch": 0.3, - "grad_norm": 2.521922180245439, - "learning_rate": 8.156502948277902e-06, - "loss": 0.6805, + "epoch": 0.21, + "grad_norm": 1.646725963449686, + "learning_rate": 9.217583137179864e-06, + "loss": 0.5587, "step": 2890 }, { - "epoch": 0.3, - "grad_norm": 2.12799521453685, - "learning_rate": 8.15518106306709e-06, - "loss": 0.6851, + "epoch": 0.21, + "grad_norm": 1.6807410412100026, + "learning_rate": 9.216965804695182e-06, + "loss": 0.5225, "step": 2891 }, { - "epoch": 0.3, - "grad_norm": 2.0483331306231944, - "learning_rate": 8.153858811296465e-06, - "loss": 0.6434, + "epoch": 0.21, + "grad_norm": 2.839811009167378, + "learning_rate": 9.216348249456077e-06, + "loss": 0.558, "step": 2892 }, { - "epoch": 0.3, - "grad_norm": 5.707423136219375, - "learning_rate": 8.152536193119638e-06, - "loss": 0.577, + "epoch": 0.21, + "grad_norm": 10.782692947323637, + "learning_rate": 9.21573047149517e-06, + "loss": 0.5606, "step": 2893 }, { - "epoch": 0.3, - "grad_norm": 3.092278245285496, - "learning_rate": 8.151213208690271e-06, - "loss": 0.6721, + "epoch": 0.21, + "grad_norm": 1.501291564678969, + "learning_rate": 9.215112470845096e-06, + "loss": 0.5906, "step": 2894 }, { - "epoch": 0.3, - "grad_norm": 2.561997031833164, - "learning_rate": 8.149889858162062e-06, - "loss": 0.6855, + "epoch": 0.21, + "grad_norm": 1.3754087025992447, + "learning_rate": 9.214494247538497e-06, + "loss": 0.5207, "step": 2895 }, { - "epoch": 0.3, - "grad_norm": 2.0803284506968662, - "learning_rate": 8.148566141688755e-06, - "loss": 0.6528, + "epoch": 0.21, + "grad_norm": 2.085231802716382, + "learning_rate": 9.213875801608032e-06, + "loss": 0.5792, "step": 2896 }, { - "epoch": 0.3, - "grad_norm": 2.9962095481419624, - "learning_rate": 8.147242059424134e-06, - "loss": 0.7353, + "epoch": 0.21, + "grad_norm": 1.7914285452031427, + "learning_rate": 9.213257133086368e-06, + "loss": 0.5254, "step": 2897 }, { - "epoch": 0.3, - "grad_norm": 2.5237602734129516, - "learning_rate": 8.145917611522029e-06, - "loss": 0.6057, + "epoch": 0.21, + "grad_norm": 2.334190065284943, + "learning_rate": 9.212638242006186e-06, + "loss": 0.647, "step": 2898 }, { - "epoch": 0.31, - "grad_norm": 1.8369538414675164, - "learning_rate": 8.14459279813631e-06, - "loss": 0.7189, + "epoch": 0.21, + "grad_norm": 0.9447230423904658, + "learning_rate": 9.212019128400177e-06, + "loss": 0.4836, "step": 2899 }, { - "epoch": 0.31, - "grad_norm": 1.2530174295755743, - "learning_rate": 8.143267619420892e-06, - "loss": 0.605, + "epoch": 0.21, + "grad_norm": 1.6731341983341819, + "learning_rate": 9.211399792301048e-06, + "loss": 0.6168, "step": 2900 }, { - "epoch": 0.31, - "grad_norm": 2.6147582967437226, - "learning_rate": 8.141942075529725e-06, - "loss": 0.7003, + "epoch": 0.21, + "grad_norm": 1.6201399854113188, + "learning_rate": 9.21078023374151e-06, + "loss": 0.5598, "step": 2901 }, { - "epoch": 0.31, - "grad_norm": 2.8824381178183907, - "learning_rate": 8.14061616661681e-06, - "loss": 0.7087, + "epoch": 0.21, + "grad_norm": 1.731615926063343, + "learning_rate": 9.210160452754292e-06, + "loss": 0.5306, "step": 2902 }, { - "epoch": 0.31, - "grad_norm": 3.993274788781763, - "learning_rate": 8.13928989283619e-06, - "loss": 0.5397, + "epoch": 0.21, + "grad_norm": 2.065902959931956, + "learning_rate": 9.209540449372132e-06, + "loss": 0.5845, "step": 2903 }, { - "epoch": 0.31, - "grad_norm": 2.5531708615561, - "learning_rate": 8.137963254341944e-06, - "loss": 0.6569, + "epoch": 0.21, + "grad_norm": 1.8482994456875714, + "learning_rate": 9.208920223627781e-06, + "loss": 0.5331, "step": 2904 }, { - "epoch": 0.31, - "grad_norm": 2.666082677328026, - "learning_rate": 8.136636251288197e-06, - "loss": 0.7032, + "epoch": 0.21, + "grad_norm": 1.652143875836721, + "learning_rate": 9.208299775554004e-06, + "loss": 0.5631, "step": 2905 }, { - "epoch": 0.31, - "grad_norm": 2.483321235755457, - "learning_rate": 8.135308883829119e-06, - "loss": 0.6559, + "epoch": 0.21, + "grad_norm": 1.5254768629588285, + "learning_rate": 9.207679105183573e-06, + "loss": 0.5309, "step": 2906 }, { - "epoch": 0.31, - "grad_norm": 2.2907791966791415, - "learning_rate": 8.133981152118916e-06, - "loss": 0.6794, + "epoch": 0.21, + "grad_norm": 1.7927125695773027, + "learning_rate": 9.207058212549273e-06, + "loss": 0.532, "step": 2907 }, { - "epoch": 0.31, - "grad_norm": 2.2809901014129657, - "learning_rate": 8.132653056311844e-06, - "loss": 0.6586, + "epoch": 0.21, + "grad_norm": 1.4798839477708898, + "learning_rate": 9.206437097683905e-06, + "loss": 0.5859, "step": 2908 }, { - "epoch": 0.31, - "grad_norm": 3.5398481577715, - "learning_rate": 8.131324596562195e-06, - "loss": 0.6637, + "epoch": 0.21, + "grad_norm": 3.567369089956538, + "learning_rate": 9.205815760620274e-06, + "loss": 0.5836, "step": 2909 }, { - "epoch": 0.31, - "grad_norm": 2.7594732513859443, - "learning_rate": 8.129995773024306e-06, - "loss": 0.7316, + "epoch": 0.21, + "grad_norm": 1.9159242475402147, + "learning_rate": 9.205194201391204e-06, + "loss": 0.6332, "step": 2910 }, { - "epoch": 0.31, - "grad_norm": 3.30105743088516, - "learning_rate": 8.128666585852556e-06, - "loss": 0.6668, + "epoch": 0.21, + "grad_norm": 1.5423109971678717, + "learning_rate": 9.204572420029527e-06, + "loss": 0.5142, "step": 2911 }, { - "epoch": 0.31, - "grad_norm": 2.731674755400316, - "learning_rate": 8.127337035201365e-06, - "loss": 0.6782, + "epoch": 0.21, + "grad_norm": 3.308868578908772, + "learning_rate": 9.203950416568088e-06, + "loss": 0.5626, "step": 2912 }, { - "epoch": 0.31, - "grad_norm": 3.186947648727673, - "learning_rate": 8.1260071212252e-06, - "loss": 0.6693, + "epoch": 0.21, + "grad_norm": 1.676845206748622, + "learning_rate": 9.203328191039742e-06, + "loss": 0.5926, "step": 2913 }, { - "epoch": 0.31, - "grad_norm": 2.198681447100411, - "learning_rate": 8.12467684407856e-06, - "loss": 0.6396, + "epoch": 0.21, + "grad_norm": 0.8533260914837673, + "learning_rate": 9.20270574347736e-06, + "loss": 0.4589, "step": 2914 }, { - "epoch": 0.31, - "grad_norm": 2.5485456110616918, - "learning_rate": 8.123346203916e-06, - "loss": 0.6218, + "epoch": 0.21, + "grad_norm": 1.7810328913358453, + "learning_rate": 9.20208307391382e-06, + "loss": 0.5349, "step": 2915 }, { - "epoch": 0.31, - "grad_norm": 2.914548621339626, - "learning_rate": 8.122015200892106e-06, - "loss": 0.6717, + "epoch": 0.21, + "grad_norm": 1.5765708446740923, + "learning_rate": 9.201460182382012e-06, + "loss": 0.6231, "step": 2916 }, { - "epoch": 0.31, - "grad_norm": 2.593755867982301, - "learning_rate": 8.120683835161511e-06, - "loss": 0.6373, + "epoch": 0.21, + "grad_norm": 1.4799066828948022, + "learning_rate": 9.20083706891484e-06, + "loss": 0.6258, "step": 2917 }, { - "epoch": 0.31, - "grad_norm": 2.062797790828946, - "learning_rate": 8.11935210687889e-06, - "loss": 0.6702, + "epoch": 0.21, + "grad_norm": 1.5957737138162542, + "learning_rate": 9.200213733545221e-06, + "loss": 0.5865, "step": 2918 }, { - "epoch": 0.31, - "grad_norm": 2.9875147078800683, - "learning_rate": 8.118020016198957e-06, - "loss": 0.6398, + "epoch": 0.21, + "grad_norm": 2.0543416602382885, + "learning_rate": 9.19959017630608e-06, + "loss": 0.6032, "step": 2919 }, { - "epoch": 0.31, - "grad_norm": 2.022004557471706, - "learning_rate": 8.11668756327647e-06, - "loss": 0.7186, + "epoch": 0.21, + "grad_norm": 1.846278467960694, + "learning_rate": 9.198966397230356e-06, + "loss": 0.6174, "step": 2920 }, { - "epoch": 0.31, - "grad_norm": 1.2197328167155737, - "learning_rate": 8.115354748266233e-06, - "loss": 0.652, + "epoch": 0.21, + "grad_norm": 1.6882515879058753, + "learning_rate": 9.198342396350998e-06, + "loss": 0.5394, "step": 2921 }, { - "epoch": 0.31, - "grad_norm": 2.0617705515592957, - "learning_rate": 8.114021571323089e-06, - "loss": 0.5757, + "epoch": 0.21, + "grad_norm": 1.977109529774458, + "learning_rate": 9.19771817370097e-06, + "loss": 0.5054, "step": 2922 }, { - "epoch": 0.31, - "grad_norm": 2.3145783721939996, - "learning_rate": 8.112688032601919e-06, - "loss": 0.6625, + "epoch": 0.21, + "grad_norm": 1.709262258868334, + "learning_rate": 9.197093729313243e-06, + "loss": 0.5326, "step": 2923 }, { - "epoch": 0.31, - "grad_norm": 2.531303219988851, - "learning_rate": 8.111354132257651e-06, - "loss": 0.6679, + "epoch": 0.21, + "grad_norm": 1.7441989459637544, + "learning_rate": 9.196469063220804e-06, + "loss": 0.5914, "step": 2924 }, { - "epoch": 0.31, - "grad_norm": 2.6045952874222555, - "learning_rate": 8.110019870445254e-06, - "loss": 0.7008, + "epoch": 0.21, + "grad_norm": 2.0429338727372692, + "learning_rate": 9.195844175456649e-06, + "loss": 0.578, "step": 2925 }, { - "epoch": 0.31, - "grad_norm": 2.9426229731805704, - "learning_rate": 8.10868524731974e-06, - "loss": 0.6898, + "epoch": 0.21, + "grad_norm": 1.8170650906672365, + "learning_rate": 9.195219066053787e-06, + "loss": 0.6785, "step": 2926 }, { - "epoch": 0.31, - "grad_norm": 2.261653088656059, - "learning_rate": 8.107350263036157e-06, - "loss": 0.6312, + "epoch": 0.21, + "grad_norm": 1.5271149899967154, + "learning_rate": 9.194593735045242e-06, + "loss": 0.5416, "step": 2927 }, { - "epoch": 0.31, - "grad_norm": 4.259944992592717, - "learning_rate": 8.106014917749605e-06, - "loss": 0.6676, + "epoch": 0.21, + "grad_norm": 1.6289956453148233, + "learning_rate": 9.193968182464039e-06, + "loss": 0.5121, "step": 2928 }, { - "epoch": 0.31, - "grad_norm": 2.6421624462915765, - "learning_rate": 8.104679211615218e-06, - "loss": 0.7101, + "epoch": 0.21, + "grad_norm": 1.8133352683877872, + "learning_rate": 9.193342408343227e-06, + "loss": 0.5974, "step": 2929 }, { - "epoch": 0.31, - "grad_norm": 2.3871886404739504, - "learning_rate": 8.103343144788177e-06, - "loss": 0.6416, + "epoch": 0.21, + "grad_norm": 2.1691409271057283, + "learning_rate": 9.192716412715858e-06, + "loss": 0.5329, "step": 2930 }, { - "epoch": 0.31, - "grad_norm": 2.1379036146977164, - "learning_rate": 8.102006717423695e-06, - "loss": 0.6405, + "epoch": 0.21, + "grad_norm": 2.3941092741171044, + "learning_rate": 9.192090195615004e-06, + "loss": 0.6266, "step": 2931 }, { - "epoch": 0.31, - "grad_norm": 3.0829424463195303, - "learning_rate": 8.100669929677044e-06, - "loss": 0.6991, + "epoch": 0.21, + "grad_norm": 1.8610670101484466, + "learning_rate": 9.191463757073739e-06, + "loss": 0.5836, "step": 2932 }, { - "epoch": 0.31, - "grad_norm": 2.45544537675686, - "learning_rate": 8.099332781703523e-06, - "loss": 0.6083, + "epoch": 0.21, + "grad_norm": 2.617513762639927, + "learning_rate": 9.190837097125156e-06, + "loss": 0.5215, "step": 2933 }, { - "epoch": 0.31, - "grad_norm": 2.5320119199003224, - "learning_rate": 8.097995273658479e-06, - "loss": 0.6114, + "epoch": 0.21, + "grad_norm": 1.7045783531611103, + "learning_rate": 9.190210215802358e-06, + "loss": 0.6331, "step": 2934 }, { - "epoch": 0.31, - "grad_norm": 2.2301392753727916, - "learning_rate": 8.0966574056973e-06, - "loss": 0.7644, + "epoch": 0.21, + "grad_norm": 1.689146392103963, + "learning_rate": 9.189583113138458e-06, + "loss": 0.6042, "step": 2935 }, { - "epoch": 0.31, - "grad_norm": 2.397360362520422, - "learning_rate": 8.095319177975412e-06, - "loss": 0.5961, + "epoch": 0.21, + "grad_norm": 1.795051603133157, + "learning_rate": 9.188955789166582e-06, + "loss": 0.5732, "step": 2936 }, { - "epoch": 0.31, - "grad_norm": 3.5622955344160414, - "learning_rate": 8.093980590648291e-06, - "loss": 0.6874, + "epoch": 0.21, + "grad_norm": 1.8505155464140037, + "learning_rate": 9.188328243919866e-06, + "loss": 0.5008, "step": 2937 }, { - "epoch": 0.31, - "grad_norm": 2.481733875250971, - "learning_rate": 8.092641643871451e-06, - "loss": 0.5639, + "epoch": 0.21, + "grad_norm": 1.6959415603088208, + "learning_rate": 9.187700477431461e-06, + "loss": 0.6485, "step": 2938 }, { - "epoch": 0.31, - "grad_norm": 2.237192875379186, - "learning_rate": 8.091302337800441e-06, - "loss": 0.6794, + "epoch": 0.21, + "grad_norm": 1.5860746687300702, + "learning_rate": 9.187072489734525e-06, + "loss": 0.5644, "step": 2939 }, { - "epoch": 0.31, - "grad_norm": 2.364977168624842, - "learning_rate": 8.089962672590865e-06, - "loss": 0.6851, + "epoch": 0.21, + "grad_norm": 1.656284732460159, + "learning_rate": 9.186444280862235e-06, + "loss": 0.5986, "step": 2940 }, { - "epoch": 0.31, - "grad_norm": 2.155447396944477, - "learning_rate": 8.088622648398357e-06, - "loss": 0.6662, + "epoch": 0.21, + "grad_norm": 1.8266162060923723, + "learning_rate": 9.18581585084777e-06, + "loss": 0.5277, "step": 2941 }, { - "epoch": 0.31, - "grad_norm": 2.8646626316625636, - "learning_rate": 8.087282265378596e-06, - "loss": 0.6547, + "epoch": 0.21, + "grad_norm": 1.486235889249101, + "learning_rate": 9.18518719972433e-06, + "loss": 0.5234, "step": 2942 }, { - "epoch": 0.31, - "grad_norm": 2.999962898982827, - "learning_rate": 8.085941523687309e-06, - "loss": 0.6913, + "epoch": 0.21, + "grad_norm": 1.3650488659449154, + "learning_rate": 9.184558327525122e-06, + "loss": 0.5241, "step": 2943 }, { - "epoch": 0.31, - "grad_norm": 2.793000783068167, - "learning_rate": 8.084600423480253e-06, - "loss": 0.7647, + "epoch": 0.21, + "grad_norm": 1.4495895950530286, + "learning_rate": 9.183929234283362e-06, + "loss": 0.5361, "step": 2944 }, { - "epoch": 0.31, - "grad_norm": 1.294318321894251, - "learning_rate": 8.083258964913238e-06, - "loss": 0.6202, + "epoch": 0.21, + "grad_norm": 1.5401359309107288, + "learning_rate": 9.183299920032282e-06, + "loss": 0.4995, "step": 2945 }, { - "epoch": 0.31, - "grad_norm": 2.3681448665076648, - "learning_rate": 8.08191714814211e-06, - "loss": 0.6897, + "epoch": 0.21, + "grad_norm": 1.8300843294979359, + "learning_rate": 9.182670384805127e-06, + "loss": 0.5308, "step": 2946 }, { - "epoch": 0.31, - "grad_norm": 3.4035241766844258, - "learning_rate": 8.080574973322755e-06, - "loss": 0.6347, + "epoch": 0.21, + "grad_norm": 3.184766244684131, + "learning_rate": 9.18204062863515e-06, + "loss": 0.5294, "step": 2947 }, { - "epoch": 0.31, - "grad_norm": 10.697359357745482, - "learning_rate": 8.079232440611106e-06, - "loss": 0.6607, + "epoch": 0.21, + "grad_norm": 2.501208382325759, + "learning_rate": 9.181410651555613e-06, + "loss": 0.5696, "step": 2948 }, { - "epoch": 0.31, - "grad_norm": 11.08038730758078, - "learning_rate": 8.077889550163133e-06, - "loss": 0.6913, + "epoch": 0.21, + "grad_norm": 2.7010771838225285, + "learning_rate": 9.1807804535998e-06, + "loss": 0.5228, "step": 2949 }, { - "epoch": 0.31, - "grad_norm": 2.289498594594031, - "learning_rate": 8.076546302134849e-06, - "loss": 0.5815, + "epoch": 0.21, + "grad_norm": 1.723230036944767, + "learning_rate": 9.180150034800996e-06, + "loss": 0.53, "step": 2950 }, { - "epoch": 0.31, - "grad_norm": 3.8411656849036024, - "learning_rate": 8.07520269668231e-06, - "loss": 0.6598, + "epoch": 0.21, + "grad_norm": 1.5605804266265146, + "learning_rate": 9.179519395192503e-06, + "loss": 0.4408, "step": 2951 }, { - "epoch": 0.31, - "grad_norm": 3.9514846702421127, - "learning_rate": 8.073858733961609e-06, - "loss": 0.7163, + "epoch": 0.21, + "grad_norm": 2.1713932337073776, + "learning_rate": 9.178888534807633e-06, + "loss": 0.6159, "step": 2952 }, { - "epoch": 0.31, - "grad_norm": 3.287925053791738, - "learning_rate": 8.072514414128886e-06, - "loss": 0.6914, + "epoch": 0.21, + "grad_norm": 0.8311380107728986, + "learning_rate": 9.17825745367971e-06, + "loss": 0.4805, "step": 2953 }, { - "epoch": 0.31, - "grad_norm": 2.850131167037012, - "learning_rate": 8.071169737340322e-06, - "loss": 0.6972, + "epoch": 0.21, + "grad_norm": 0.7918724966847771, + "learning_rate": 9.177626151842072e-06, + "loss": 0.4876, "step": 2954 }, { - "epoch": 0.31, - "grad_norm": 3.3564428711936714, - "learning_rate": 8.069824703752136e-06, - "loss": 0.6299, + "epoch": 0.21, + "grad_norm": 0.9025730819301531, + "learning_rate": 9.176994629328064e-06, + "loss": 0.4764, "step": 2955 }, { - "epoch": 0.31, - "grad_norm": 2.5337256748284793, - "learning_rate": 8.068479313520589e-06, - "loss": 0.7192, + "epoch": 0.21, + "grad_norm": 4.221840446075136, + "learning_rate": 9.176362886171047e-06, + "loss": 0.5358, "step": 2956 }, { - "epoch": 0.31, - "grad_norm": 2.7829671234993767, - "learning_rate": 8.067133566801986e-06, - "loss": 0.6942, + "epoch": 0.21, + "grad_norm": 1.4546109512245118, + "learning_rate": 9.17573092240439e-06, + "loss": 0.5404, "step": 2957 }, { - "epoch": 0.31, - "grad_norm": 2.0780064728452814, - "learning_rate": 8.06578746375267e-06, - "loss": 0.699, + "epoch": 0.21, + "grad_norm": 1.5904457778917604, + "learning_rate": 9.175098738061477e-06, + "loss": 0.5931, "step": 2958 }, { - "epoch": 0.31, - "grad_norm": 2.066654395430205, - "learning_rate": 8.06444100452903e-06, - "loss": 0.6052, + "epoch": 0.21, + "grad_norm": 1.8026307151274432, + "learning_rate": 9.1744663331757e-06, + "loss": 0.5396, "step": 2959 }, { - "epoch": 0.31, - "grad_norm": 2.3241867147767636, - "learning_rate": 8.063094189287492e-06, - "loss": 0.6425, + "epoch": 0.21, + "grad_norm": 1.773431336814164, + "learning_rate": 9.173833707780469e-06, + "loss": 0.5343, "step": 2960 }, { - "epoch": 0.31, - "grad_norm": 2.936750856792946, - "learning_rate": 8.061747018184525e-06, - "loss": 0.6816, + "epoch": 0.21, + "grad_norm": 1.7349186652520812, + "learning_rate": 9.173200861909196e-06, + "loss": 0.5274, "step": 2961 }, { - "epoch": 0.31, - "grad_norm": 2.564282733037237, - "learning_rate": 8.06039949137664e-06, - "loss": 0.7117, + "epoch": 0.21, + "grad_norm": 1.6585229766432203, + "learning_rate": 9.172567795595314e-06, + "loss": 0.5584, "step": 2962 }, { - "epoch": 0.31, - "grad_norm": 35.37441915823146, - "learning_rate": 8.05905160902039e-06, - "loss": 0.7604, + "epoch": 0.21, + "grad_norm": 1.7161871147778, + "learning_rate": 9.171934508872262e-06, + "loss": 0.5924, "step": 2963 }, { - "epoch": 0.31, - "grad_norm": 3.470994439733572, - "learning_rate": 8.057703371272368e-06, - "loss": 0.6648, + "epoch": 0.21, + "grad_norm": 1.856979841271599, + "learning_rate": 9.171301001773493e-06, + "loss": 0.6003, "step": 2964 }, { - "epoch": 0.31, - "grad_norm": 2.653406363973704, - "learning_rate": 8.056354778289204e-06, - "loss": 0.6818, + "epoch": 0.21, + "grad_norm": 1.6700972988581784, + "learning_rate": 9.17066727433247e-06, + "loss": 0.5651, "step": 2965 }, { - "epoch": 0.31, - "grad_norm": 2.7162242932530374, - "learning_rate": 8.055005830227578e-06, - "loss": 0.6176, + "epoch": 0.21, + "grad_norm": 1.6977608473129604, + "learning_rate": 9.17003332658267e-06, + "loss": 0.6016, "step": 2966 }, { - "epoch": 0.31, - "grad_norm": 2.0336246489108563, - "learning_rate": 8.053656527244206e-06, - "loss": 0.6957, + "epoch": 0.21, + "grad_norm": 1.5624041318138744, + "learning_rate": 9.169399158557581e-06, + "loss": 0.579, "step": 2967 }, { - "epoch": 0.31, - "grad_norm": 2.404566189632472, - "learning_rate": 8.052306869495847e-06, - "loss": 0.6862, + "epoch": 0.21, + "grad_norm": 2.8438849536673403, + "learning_rate": 9.1687647702907e-06, + "loss": 0.5662, "step": 2968 }, { - "epoch": 0.31, - "grad_norm": 2.662561717072185, - "learning_rate": 8.050956857139298e-06, - "loss": 0.7162, + "epoch": 0.21, + "grad_norm": 2.7454043905149943, + "learning_rate": 9.168130161815538e-06, + "loss": 0.5761, "step": 2969 }, { - "epoch": 0.31, - "grad_norm": 2.5875784364886254, - "learning_rate": 8.049606490331403e-06, - "loss": 0.6401, + "epoch": 0.21, + "grad_norm": 4.991668780322095, + "learning_rate": 9.167495333165615e-06, + "loss": 0.5257, "step": 2970 }, { - "epoch": 0.31, - "grad_norm": 2.978839811791059, - "learning_rate": 8.048255769229038e-06, - "loss": 0.6423, + "epoch": 0.21, + "grad_norm": 1.6285655025080312, + "learning_rate": 9.16686028437447e-06, + "loss": 0.5972, "step": 2971 }, { - "epoch": 0.31, - "grad_norm": 3.159696497187548, - "learning_rate": 8.046904693989132e-06, - "loss": 0.6541, + "epoch": 0.21, + "grad_norm": 1.7501827111969754, + "learning_rate": 9.166225015475645e-06, + "loss": 0.5755, "step": 2972 }, { - "epoch": 0.31, - "grad_norm": 3.108106082999746, - "learning_rate": 8.045553264768645e-06, - "loss": 0.7498, + "epoch": 0.21, + "grad_norm": 1.6985119952914056, + "learning_rate": 9.165589526502696e-06, + "loss": 0.5695, "step": 2973 }, { - "epoch": 0.31, - "grad_norm": 2.4365012913470747, - "learning_rate": 8.044201481724582e-06, - "loss": 0.6099, + "epoch": 0.21, + "grad_norm": 1.7558358586557516, + "learning_rate": 9.164953817489195e-06, + "loss": 0.6293, "step": 2974 }, { - "epoch": 0.31, - "grad_norm": 2.9007518834879593, - "learning_rate": 8.042849345013995e-06, - "loss": 0.6726, + "epoch": 0.21, + "grad_norm": 0.9802599679421644, + "learning_rate": 9.16431788846872e-06, + "loss": 0.48, "step": 2975 }, { - "epoch": 0.31, - "grad_norm": 2.2970067282582476, - "learning_rate": 8.041496854793964e-06, - "loss": 0.5851, + "epoch": 0.21, + "grad_norm": 1.604141576923345, + "learning_rate": 9.163681739474864e-06, + "loss": 0.5808, "step": 2976 }, { - "epoch": 0.31, - "grad_norm": 3.006328818924341, - "learning_rate": 8.040144011221621e-06, - "loss": 0.6791, + "epoch": 0.21, + "grad_norm": 1.605872711387335, + "learning_rate": 9.163045370541229e-06, + "loss": 0.5692, "step": 2977 }, { - "epoch": 0.31, - "grad_norm": 2.6939058418287805, - "learning_rate": 8.038790814454137e-06, - "loss": 0.726, + "epoch": 0.21, + "grad_norm": 1.529910082676683, + "learning_rate": 9.162408781701432e-06, + "loss": 0.5131, "step": 2978 }, { - "epoch": 0.31, - "grad_norm": 2.2054039052694705, - "learning_rate": 8.037437264648717e-06, - "loss": 0.6719, + "epoch": 0.21, + "grad_norm": 1.6729249340760217, + "learning_rate": 9.161771972989098e-06, + "loss": 0.623, "step": 2979 }, { - "epoch": 0.31, - "grad_norm": 2.41222955568751, - "learning_rate": 8.036083361962616e-06, - "loss": 0.6879, + "epoch": 0.21, + "grad_norm": 1.5859562145462096, + "learning_rate": 9.161134944437868e-06, + "loss": 0.559, "step": 2980 }, { - "epoch": 0.31, - "grad_norm": 2.4774726350606158, - "learning_rate": 8.03472910655313e-06, - "loss": 0.7345, + "epoch": 0.21, + "grad_norm": 1.6722943051443542, + "learning_rate": 9.16049769608139e-06, + "loss": 0.5306, "step": 2981 }, { - "epoch": 0.31, - "grad_norm": 2.3042323688271327, - "learning_rate": 8.033374498577586e-06, - "loss": 0.7063, + "epoch": 0.21, + "grad_norm": 1.7344162292165934, + "learning_rate": 9.159860227953325e-06, + "loss": 0.5083, "step": 2982 }, { - "epoch": 0.31, - "grad_norm": 2.7730077023368938, - "learning_rate": 8.032019538193363e-06, - "loss": 0.7008, + "epoch": 0.21, + "grad_norm": 1.487984191718565, + "learning_rate": 9.159222540087347e-06, + "loss": 0.4905, "step": 2983 }, { - "epoch": 0.31, - "grad_norm": 2.7189215222404144, - "learning_rate": 8.030664225557873e-06, - "loss": 0.6169, + "epoch": 0.21, + "grad_norm": 1.545422344028103, + "learning_rate": 9.158584632517142e-06, + "loss": 0.5813, "step": 2984 }, { - "epoch": 0.31, - "grad_norm": 2.845427019172591, - "learning_rate": 8.029308560828574e-06, - "loss": 0.7042, + "epoch": 0.21, + "grad_norm": 1.6175615746384093, + "learning_rate": 9.157946505276405e-06, + "loss": 0.6, "step": 2985 }, { - "epoch": 0.31, - "grad_norm": 2.311773529022882, - "learning_rate": 8.027952544162965e-06, - "loss": 0.7276, + "epoch": 0.21, + "grad_norm": 1.6946303444978852, + "learning_rate": 9.157308158398846e-06, + "loss": 0.579, "step": 2986 }, { - "epoch": 0.31, - "grad_norm": 4.498629343600977, - "learning_rate": 8.026596175718582e-06, - "loss": 0.65, + "epoch": 0.21, + "grad_norm": 2.446521766973696, + "learning_rate": 9.15666959191818e-06, + "loss": 0.4778, "step": 2987 }, { - "epoch": 0.31, - "grad_norm": 2.527545636273197, - "learning_rate": 8.025239455653003e-06, - "loss": 0.6596, + "epoch": 0.21, + "grad_norm": 3.0929677688933928, + "learning_rate": 9.156030805868144e-06, + "loss": 0.5906, "step": 2988 }, { - "epoch": 0.31, - "grad_norm": 2.3431814650204354, - "learning_rate": 8.023882384123851e-06, - "loss": 0.5784, + "epoch": 0.21, + "grad_norm": 1.7452174914756822, + "learning_rate": 9.155391800282477e-06, + "loss": 0.5239, "step": 2989 }, { - "epoch": 0.31, - "grad_norm": 2.266244506572552, - "learning_rate": 8.022524961288783e-06, - "loss": 0.6536, + "epoch": 0.21, + "grad_norm": 2.0798375651391963, + "learning_rate": 9.154752575194936e-06, + "loss": 0.6283, "step": 2990 }, { - "epoch": 0.31, - "grad_norm": 2.4413399629821395, - "learning_rate": 8.021167187305504e-06, - "loss": 0.648, + "epoch": 0.21, + "grad_norm": 0.8712352574894433, + "learning_rate": 9.154113130639286e-06, + "loss": 0.451, "step": 2991 }, { - "epoch": 0.31, - "grad_norm": 3.8044222712960485, - "learning_rate": 8.019809062331754e-06, - "loss": 0.6785, + "epoch": 0.21, + "grad_norm": 2.0929544050817475, + "learning_rate": 9.153473466649303e-06, + "loss": 0.5189, "step": 2992 }, { - "epoch": 0.31, - "grad_norm": 2.8009795918380527, - "learning_rate": 8.018450586525314e-06, - "loss": 0.6825, + "epoch": 0.21, + "grad_norm": 1.5847481238704257, + "learning_rate": 9.152833583258778e-06, + "loss": 0.5178, "step": 2993 }, { - "epoch": 0.32, - "grad_norm": 2.044733004957816, - "learning_rate": 8.017091760044014e-06, - "loss": 0.7218, + "epoch": 0.21, + "grad_norm": 1.603887266491553, + "learning_rate": 9.152193480501512e-06, + "loss": 0.4602, "step": 2994 }, { - "epoch": 0.32, - "grad_norm": 2.5348478649648625, - "learning_rate": 8.015732583045713e-06, - "loss": 0.6306, + "epoch": 0.21, + "grad_norm": 2.014600751045698, + "learning_rate": 9.151553158411318e-06, + "loss": 0.6141, "step": 2995 }, { - "epoch": 0.32, - "grad_norm": 2.820292578056801, - "learning_rate": 8.014373055688319e-06, - "loss": 0.6103, + "epoch": 0.21, + "grad_norm": 1.5654636288362136, + "learning_rate": 9.150912617022015e-06, + "loss": 0.5854, "step": 2996 }, { - "epoch": 0.32, - "grad_norm": 2.83941151917699, - "learning_rate": 8.013013178129775e-06, - "loss": 0.739, + "epoch": 0.21, + "grad_norm": 1.5731182236432362, + "learning_rate": 9.150271856367445e-06, + "loss": 0.5288, "step": 2997 }, { - "epoch": 0.32, - "grad_norm": 2.562461642222233, - "learning_rate": 8.01165295052807e-06, - "loss": 0.6528, + "epoch": 0.21, + "grad_norm": 1.9013412660735625, + "learning_rate": 9.149630876481452e-06, + "loss": 0.5863, "step": 2998 }, { - "epoch": 0.32, - "grad_norm": 2.2991584011557946, - "learning_rate": 8.010292373041233e-06, - "loss": 0.6633, + "epoch": 0.21, + "grad_norm": 1.6864579669043867, + "learning_rate": 9.148989677397894e-06, + "loss": 0.6295, "step": 2999 }, { - "epoch": 0.32, - "grad_norm": 3.0164089815235755, - "learning_rate": 8.008931445827329e-06, - "loss": 0.6508, + "epoch": 0.21, + "grad_norm": 1.528937163454691, + "learning_rate": 9.148348259150645e-06, + "loss": 0.5504, "step": 3000 }, { - "epoch": 0.32, - "grad_norm": 2.2488209086571036, - "learning_rate": 8.007570169044467e-06, - "loss": 0.6924, + "epoch": 0.21, + "grad_norm": 1.7665446986714837, + "learning_rate": 9.147706621773583e-06, + "loss": 0.568, "step": 3001 }, { - "epoch": 0.32, - "grad_norm": 4.067344347818427, - "learning_rate": 8.006208542850797e-06, - "loss": 0.716, + "epoch": 0.21, + "grad_norm": 1.6248087598036456, + "learning_rate": 9.147064765300604e-06, + "loss": 0.4851, "step": 3002 }, { - "epoch": 0.32, - "grad_norm": 2.433534837689038, - "learning_rate": 8.004846567404509e-06, - "loss": 0.6682, + "epoch": 0.21, + "grad_norm": 1.8089490497103624, + "learning_rate": 9.14642268976561e-06, + "loss": 0.448, "step": 3003 }, { - "epoch": 0.32, - "grad_norm": 3.1675254720528176, - "learning_rate": 8.003484242863833e-06, - "loss": 0.744, + "epoch": 0.21, + "grad_norm": 1.6990973885116376, + "learning_rate": 9.145780395202522e-06, + "loss": 0.5705, "step": 3004 }, { - "epoch": 0.32, - "grad_norm": 2.3134441373453916, - "learning_rate": 8.00212156938704e-06, - "loss": 0.5428, + "epoch": 0.21, + "grad_norm": 1.5957960742209898, + "learning_rate": 9.145137881645262e-06, + "loss": 0.5573, "step": 3005 }, { - "epoch": 0.32, - "grad_norm": 2.8236083439191355, - "learning_rate": 8.000758547132441e-06, - "loss": 0.66, + "epoch": 0.21, + "grad_norm": 1.5207597940791149, + "learning_rate": 9.144495149127777e-06, + "loss": 0.5704, "step": 3006 }, { - "epoch": 0.32, - "grad_norm": 2.158397208676714, - "learning_rate": 7.99939517625839e-06, - "loss": 0.7367, + "epoch": 0.21, + "grad_norm": 1.906382297447813, + "learning_rate": 9.143852197684013e-06, + "loss": 0.5566, "step": 3007 }, { - "epoch": 0.32, - "grad_norm": 2.2654650330105257, - "learning_rate": 7.998031456923274e-06, - "loss": 0.5894, + "epoch": 0.21, + "grad_norm": 1.006046244726447, + "learning_rate": 9.143209027347936e-06, + "loss": 0.4858, "step": 3008 }, { - "epoch": 0.32, - "grad_norm": 2.5662295257161833, - "learning_rate": 7.996667389285532e-06, - "loss": 0.6596, + "epoch": 0.21, + "grad_norm": 0.9325764639786617, + "learning_rate": 9.142565638153518e-06, + "loss": 0.4498, "step": 3009 }, { - "epoch": 0.32, - "grad_norm": 2.677019268255404, - "learning_rate": 7.995302973503636e-06, - "loss": 0.6978, + "epoch": 0.21, + "grad_norm": 4.975213812059827, + "learning_rate": 9.141922030134747e-06, + "loss": 0.5527, "step": 3010 }, { - "epoch": 0.32, - "grad_norm": 2.371436079489015, - "learning_rate": 7.993938209736097e-06, - "loss": 0.69, + "epoch": 0.21, + "grad_norm": 2.5157513079606777, + "learning_rate": 9.141278203325619e-06, + "loss": 0.5798, "step": 3011 }, { - "epoch": 0.32, - "grad_norm": 2.1346580337024186, - "learning_rate": 7.992573098141472e-06, - "loss": 0.644, + "epoch": 0.21, + "grad_norm": 1.826891708217817, + "learning_rate": 9.140634157760142e-06, + "loss": 0.5975, "step": 3012 }, { - "epoch": 0.32, - "grad_norm": 1.9295251025511633, - "learning_rate": 7.991207638878356e-06, - "loss": 0.6617, + "epoch": 0.21, + "grad_norm": 2.0737973727810335, + "learning_rate": 9.13998989347234e-06, + "loss": 0.5423, "step": 3013 }, { - "epoch": 0.32, - "grad_norm": 2.558479337294168, - "learning_rate": 7.989841832105382e-06, - "loss": 0.7159, + "epoch": 0.21, + "grad_norm": 1.8106414184389292, + "learning_rate": 9.139345410496244e-06, + "loss": 0.5813, "step": 3014 }, { - "epoch": 0.32, - "grad_norm": 2.147625038233415, - "learning_rate": 7.988475677981229e-06, - "loss": 0.6179, + "epoch": 0.21, + "grad_norm": 1.7278106931746051, + "learning_rate": 9.138700708865898e-06, + "loss": 0.5569, "step": 3015 }, { - "epoch": 0.32, - "grad_norm": 2.1693505791045053, - "learning_rate": 7.98710917666461e-06, - "loss": 0.6942, + "epoch": 0.21, + "grad_norm": 2.3353286441243455, + "learning_rate": 9.138055788615355e-06, + "loss": 0.5294, "step": 3016 }, { - "epoch": 0.32, - "grad_norm": 3.1568770622983475, - "learning_rate": 7.985742328314279e-06, - "loss": 0.7053, + "epoch": 0.21, + "grad_norm": 2.373752084948341, + "learning_rate": 9.137410649778683e-06, + "loss": 0.5667, "step": 3017 }, { - "epoch": 0.32, - "grad_norm": 2.1959613312211084, - "learning_rate": 7.984375133089038e-06, - "loss": 0.637, + "epoch": 0.21, + "grad_norm": 1.5920221641691443, + "learning_rate": 9.136765292389964e-06, + "loss": 0.6153, "step": 3018 }, { - "epoch": 0.32, - "grad_norm": 9.895723607078637, - "learning_rate": 7.98300759114772e-06, - "loss": 0.7098, + "epoch": 0.21, + "grad_norm": 1.5521995996842464, + "learning_rate": 9.136119716483282e-06, + "loss": 0.5316, "step": 3019 }, { - "epoch": 0.32, - "grad_norm": 2.234596285844836, - "learning_rate": 7.981639702649204e-06, - "loss": 0.6282, + "epoch": 0.21, + "grad_norm": 1.7468828635103204, + "learning_rate": 9.135473922092743e-06, + "loss": 0.5768, "step": 3020 }, { - "epoch": 0.32, - "grad_norm": 2.2374506416442133, - "learning_rate": 7.980271467752405e-06, - "loss": 0.6773, + "epoch": 0.21, + "grad_norm": 1.9542218394067772, + "learning_rate": 9.134827909252459e-06, + "loss": 0.5349, "step": 3021 }, { - "epoch": 0.32, - "grad_norm": 2.6097297477113326, - "learning_rate": 7.97890288661628e-06, - "loss": 0.7456, + "epoch": 0.21, + "grad_norm": 1.7371380364882352, + "learning_rate": 9.134181677996555e-06, + "loss": 0.5789, "step": 3022 }, { - "epoch": 0.32, - "grad_norm": 2.8225474093932426, - "learning_rate": 7.977533959399833e-06, - "loss": 0.68, + "epoch": 0.21, + "grad_norm": 1.6346140635644437, + "learning_rate": 9.133535228359163e-06, + "loss": 0.5584, "step": 3023 }, { - "epoch": 0.32, - "grad_norm": 2.6648211533685795, - "learning_rate": 7.976164686262096e-06, - "loss": 0.686, + "epoch": 0.21, + "grad_norm": 2.0171674204507517, + "learning_rate": 9.132888560374437e-06, + "loss": 0.5166, "step": 3024 }, { - "epoch": 0.32, - "grad_norm": 2.3486556822537206, - "learning_rate": 7.974795067362148e-06, - "loss": 0.7185, + "epoch": 0.21, + "grad_norm": 2.1916088877102986, + "learning_rate": 9.132241674076532e-06, + "loss": 0.5822, "step": 3025 }, { - "epoch": 0.32, - "grad_norm": 2.529824254487311, - "learning_rate": 7.97342510285911e-06, - "loss": 0.6447, + "epoch": 0.21, + "grad_norm": 2.0843796049119203, + "learning_rate": 9.131594569499618e-06, + "loss": 0.5895, "step": 3026 }, { - "epoch": 0.32, - "grad_norm": 2.7391077394652763, - "learning_rate": 7.972054792912138e-06, - "loss": 0.6169, + "epoch": 0.21, + "grad_norm": 1.9325327916467356, + "learning_rate": 9.130947246677881e-06, + "loss": 0.5577, "step": 3027 }, { - "epoch": 0.32, - "grad_norm": 2.237331937538726, - "learning_rate": 7.970684137680431e-06, - "loss": 0.6488, + "epoch": 0.21, + "grad_norm": 1.7191324186902104, + "learning_rate": 9.130299705645513e-06, + "loss": 0.594, "step": 3028 }, { - "epoch": 0.32, - "grad_norm": 2.2823597001812614, - "learning_rate": 7.969313137323228e-06, - "loss": 0.6577, + "epoch": 0.21, + "grad_norm": 1.8799810582389074, + "learning_rate": 9.12965194643672e-06, + "loss": 0.4938, "step": 3029 }, { - "epoch": 0.32, - "grad_norm": 2.6046928602923143, - "learning_rate": 7.96794179199981e-06, - "loss": 0.682, + "epoch": 0.22, + "grad_norm": 10.228057749373589, + "learning_rate": 9.129003969085716e-06, + "loss": 0.6013, "step": 3030 }, { - "epoch": 0.32, - "grad_norm": 3.018838373919387, - "learning_rate": 7.966570101869494e-06, - "loss": 0.6314, + "epoch": 0.22, + "grad_norm": 1.8828906269914918, + "learning_rate": 9.128355773626732e-06, + "loss": 0.5556, "step": 3031 }, { - "epoch": 0.32, - "grad_norm": 3.190243611941645, - "learning_rate": 7.965198067091637e-06, - "loss": 0.6224, + "epoch": 0.22, + "grad_norm": 1.3341773231749388, + "learning_rate": 9.127707360094007e-06, + "loss": 0.5167, "step": 3032 }, { - "epoch": 0.32, - "grad_norm": 2.4033899287674183, - "learning_rate": 7.96382568782564e-06, - "loss": 0.6538, + "epoch": 0.22, + "grad_norm": 1.5680382447268233, + "learning_rate": 9.127058728521794e-06, + "loss": 0.52, "step": 3033 }, { - "epoch": 0.32, - "grad_norm": 2.57052475740044, - "learning_rate": 7.962452964230944e-06, - "loss": 0.6519, + "epoch": 0.22, + "grad_norm": 2.270468291645423, + "learning_rate": 9.126409878944354e-06, + "loss": 0.5164, "step": 3034 }, { - "epoch": 0.32, - "grad_norm": 3.0579037048889752, - "learning_rate": 7.961079896467025e-06, - "loss": 0.5823, + "epoch": 0.22, + "grad_norm": 1.68530375471036, + "learning_rate": 9.125760811395961e-06, + "loss": 0.5454, "step": 3035 }, { - "epoch": 0.32, - "grad_norm": 2.8012393029432645, - "learning_rate": 7.959706484693405e-06, - "loss": 0.6989, + "epoch": 0.22, + "grad_norm": 1.5971411973859018, + "learning_rate": 9.125111525910902e-06, + "loss": 0.5774, "step": 3036 }, { - "epoch": 0.32, - "grad_norm": 2.7558779760997525, - "learning_rate": 7.95833272906964e-06, - "loss": 0.6581, + "epoch": 0.22, + "grad_norm": 1.680310404666657, + "learning_rate": 9.124462022523475e-06, + "loss": 0.5864, "step": 3037 }, { - "epoch": 0.32, - "grad_norm": 2.1874011393784665, - "learning_rate": 7.95695862975533e-06, - "loss": 0.6699, + "epoch": 0.22, + "grad_norm": 1.782543093403225, + "learning_rate": 9.12381230126799e-06, + "loss": 0.5929, "step": 3038 }, { - "epoch": 0.32, - "grad_norm": 2.6164914345320107, - "learning_rate": 7.955584186910115e-06, - "loss": 0.7054, + "epoch": 0.22, + "grad_norm": 1.7694127654386875, + "learning_rate": 9.123162362178763e-06, + "loss": 0.5924, "step": 3039 }, { - "epoch": 0.32, - "grad_norm": 3.0292864485504594, - "learning_rate": 7.954209400693673e-06, - "loss": 0.6537, + "epoch": 0.22, + "grad_norm": 1.7572762086094833, + "learning_rate": 9.122512205290131e-06, + "loss": 0.6054, "step": 3040 }, { - "epoch": 0.32, - "grad_norm": 2.905511291216553, - "learning_rate": 7.95283427126572e-06, - "loss": 0.6966, + "epoch": 0.22, + "grad_norm": 1.8324623581471517, + "learning_rate": 9.121861830636434e-06, + "loss": 0.5614, "step": 3041 }, { - "epoch": 0.32, - "grad_norm": 1.208663400211376, - "learning_rate": 7.95145879878602e-06, - "loss": 0.5702, + "epoch": 0.22, + "grad_norm": 1.8773230770202354, + "learning_rate": 9.121211238252028e-06, + "loss": 0.6082, "step": 3042 }, { - "epoch": 0.32, - "grad_norm": 2.559598093856362, - "learning_rate": 7.950082983414367e-06, - "loss": 0.6517, + "epoch": 0.22, + "grad_norm": 1.7622687320156118, + "learning_rate": 9.120560428171281e-06, + "loss": 0.5844, "step": 3043 }, { - "epoch": 0.32, - "grad_norm": 2.389487342933906, - "learning_rate": 7.948706825310601e-06, - "loss": 0.6289, + "epoch": 0.22, + "grad_norm": 0.8691041338728247, + "learning_rate": 9.11990940042857e-06, + "loss": 0.4735, "step": 3044 }, { - "epoch": 0.32, - "grad_norm": 2.675926284212583, - "learning_rate": 7.947330324634601e-06, - "loss": 0.6088, + "epoch": 0.22, + "grad_norm": 1.637574757987552, + "learning_rate": 9.119258155058284e-06, + "loss": 0.6148, "step": 3045 }, { - "epoch": 0.32, - "grad_norm": 2.648358995913935, - "learning_rate": 7.945953481546282e-06, - "loss": 0.6291, + "epoch": 0.22, + "grad_norm": 1.4950484904436516, + "learning_rate": 9.118606692094823e-06, + "loss": 0.5578, "step": 3046 }, { - "epoch": 0.32, - "grad_norm": 1.2023556963692583, - "learning_rate": 7.944576296205603e-06, - "loss": 0.5934, + "epoch": 0.22, + "grad_norm": 3.1753274639163718, + "learning_rate": 9.117955011572601e-06, + "loss": 0.5191, "step": 3047 }, { - "epoch": 0.32, - "grad_norm": 2.627906298885444, - "learning_rate": 7.943198768772565e-06, - "loss": 0.743, + "epoch": 0.22, + "grad_norm": 1.5911642832126338, + "learning_rate": 9.117303113526044e-06, + "loss": 0.5152, "step": 3048 }, { - "epoch": 0.32, - "grad_norm": 3.399467853821857, - "learning_rate": 7.9418208994072e-06, - "loss": 0.6587, + "epoch": 0.22, + "grad_norm": 1.7274471061849384, + "learning_rate": 9.116650997989584e-06, + "loss": 0.5988, "step": 3049 }, { - "epoch": 0.32, - "grad_norm": 2.6749330583941258, - "learning_rate": 7.940442688269587e-06, - "loss": 0.6825, + "epoch": 0.22, + "grad_norm": 1.7384893576992186, + "learning_rate": 9.115998664997667e-06, + "loss": 0.5839, "step": 3050 }, { - "epoch": 0.32, - "grad_norm": 2.9142489850897517, - "learning_rate": 7.939064135519844e-06, - "loss": 0.6535, + "epoch": 0.22, + "grad_norm": 3.744277285075216, + "learning_rate": 9.115346114584757e-06, + "loss": 0.6705, "step": 3051 }, { - "epoch": 0.32, - "grad_norm": 2.3087341250267763, - "learning_rate": 7.937685241318122e-06, - "loss": 0.6461, + "epoch": 0.22, + "grad_norm": 2.966783849680409, + "learning_rate": 9.11469334678532e-06, + "loss": 0.5768, "step": 3052 }, { - "epoch": 0.32, - "grad_norm": 6.292432896926418, - "learning_rate": 7.936306005824624e-06, - "loss": 0.6497, + "epoch": 0.22, + "grad_norm": 1.876745539937525, + "learning_rate": 9.114040361633836e-06, + "loss": 0.4991, "step": 3053 }, { - "epoch": 0.32, - "grad_norm": 3.3219986590091435, - "learning_rate": 7.93492642919958e-06, - "loss": 0.6326, + "epoch": 0.22, + "grad_norm": 1.7567659054278457, + "learning_rate": 9.1133871591648e-06, + "loss": 0.462, "step": 3054 }, { - "epoch": 0.32, - "grad_norm": 3.119470589054828, - "learning_rate": 7.933546511603269e-06, - "loss": 0.6417, + "epoch": 0.22, + "grad_norm": 1.7899616000209557, + "learning_rate": 9.112733739412717e-06, + "loss": 0.6415, "step": 3055 }, { - "epoch": 0.32, - "grad_norm": 6.774960844710111, - "learning_rate": 7.932166253196004e-06, - "loss": 0.5901, + "epoch": 0.22, + "grad_norm": 1.830926609022795, + "learning_rate": 9.112080102412101e-06, + "loss": 0.6188, "step": 3056 }, { - "epoch": 0.32, - "grad_norm": 2.4456975659415847, - "learning_rate": 7.93078565413814e-06, - "loss": 0.6138, + "epoch": 0.22, + "grad_norm": 2.1085799532131833, + "learning_rate": 9.111426248197484e-06, + "loss": 0.5691, "step": 3057 }, { - "epoch": 0.32, - "grad_norm": 2.8996471024029513, - "learning_rate": 7.92940471459007e-06, - "loss": 0.6084, + "epoch": 0.22, + "grad_norm": 1.7710242467532016, + "learning_rate": 9.110772176803397e-06, + "loss": 0.5637, "step": 3058 }, { - "epoch": 0.32, - "grad_norm": 2.1348588208140797, - "learning_rate": 7.928023434712227e-06, - "loss": 0.6235, + "epoch": 0.22, + "grad_norm": 1.916134213040013, + "learning_rate": 9.110117888264398e-06, + "loss": 0.4841, "step": 3059 }, { - "epoch": 0.32, - "grad_norm": 3.1903589219833037, - "learning_rate": 7.926641814665088e-06, - "loss": 0.6413, + "epoch": 0.22, + "grad_norm": 1.7840681423713853, + "learning_rate": 9.109463382615042e-06, + "loss": 0.5923, "step": 3060 }, { - "epoch": 0.32, - "grad_norm": 2.6063098506456126, - "learning_rate": 7.925259854609162e-06, - "loss": 0.6501, + "epoch": 0.22, + "grad_norm": 2.2195526516796105, + "learning_rate": 9.108808659889906e-06, + "loss": 0.523, "step": 3061 }, { - "epoch": 0.32, - "grad_norm": 7.68937491835249, - "learning_rate": 7.923877554705002e-06, - "loss": 0.632, + "epoch": 0.22, + "grad_norm": 1.8849076598026915, + "learning_rate": 9.108153720123576e-06, + "loss": 0.5638, "step": 3062 }, { - "epoch": 0.32, - "grad_norm": 3.2423734100409196, - "learning_rate": 7.9224949151132e-06, - "loss": 0.7238, + "epoch": 0.22, + "grad_norm": 1.8728901290843416, + "learning_rate": 9.107498563350644e-06, + "loss": 0.6224, "step": 3063 }, { - "epoch": 0.32, - "grad_norm": 2.607658237678769, - "learning_rate": 7.921111935994388e-06, - "loss": 0.6373, + "epoch": 0.22, + "grad_norm": 1.5080919307313716, + "learning_rate": 9.106843189605721e-06, + "loss": 0.5547, "step": 3064 }, { - "epoch": 0.32, - "grad_norm": 2.727385481962859, - "learning_rate": 7.919728617509233e-06, - "loss": 0.6826, + "epoch": 0.22, + "grad_norm": 1.5080909963625597, + "learning_rate": 9.106187598923425e-06, + "loss": 0.6043, "step": 3065 }, { - "epoch": 0.32, - "grad_norm": 2.6366816990101145, - "learning_rate": 7.91834495981845e-06, - "loss": 0.6223, + "epoch": 0.22, + "grad_norm": 1.9581393788959849, + "learning_rate": 9.105531791338384e-06, + "loss": 0.5505, "step": 3066 }, { - "epoch": 0.32, - "grad_norm": 2.388243265491952, - "learning_rate": 7.916960963082783e-06, - "loss": 0.6517, + "epoch": 0.22, + "grad_norm": 1.6855386456156267, + "learning_rate": 9.104875766885245e-06, + "loss": 0.5715, "step": 3067 }, { - "epoch": 0.32, - "grad_norm": 2.384651407591471, - "learning_rate": 7.915576627463024e-06, - "loss": 0.6422, + "epoch": 0.22, + "grad_norm": 1.7009417088530405, + "learning_rate": 9.104219525598657e-06, + "loss": 0.5999, "step": 3068 }, { - "epoch": 0.32, - "grad_norm": 2.315078446495304, - "learning_rate": 7.91419195312e-06, - "loss": 0.7492, + "epoch": 0.22, + "grad_norm": 2.0000097016941893, + "learning_rate": 9.103563067513289e-06, + "loss": 0.5904, "step": 3069 }, { - "epoch": 0.32, - "grad_norm": 2.5016948568104214, - "learning_rate": 7.91280694021458e-06, - "loss": 0.5617, + "epoch": 0.22, + "grad_norm": 2.0738803553262453, + "learning_rate": 9.102906392663812e-06, + "loss": 0.5837, "step": 3070 }, { - "epoch": 0.32, - "grad_norm": 2.267739297104988, - "learning_rate": 7.91142158890767e-06, - "loss": 0.6419, + "epoch": 0.22, + "grad_norm": 1.7028827670068771, + "learning_rate": 9.102249501084918e-06, + "loss": 0.6051, "step": 3071 }, { - "epoch": 0.32, - "grad_norm": 2.9294081180768607, - "learning_rate": 7.910035899360215e-06, - "loss": 0.6419, + "epoch": 0.22, + "grad_norm": 1.8530091896005512, + "learning_rate": 9.101592392811307e-06, + "loss": 0.6064, "step": 3072 }, { - "epoch": 0.32, - "grad_norm": 2.5533958607494083, - "learning_rate": 7.908649871733202e-06, - "loss": 0.6337, + "epoch": 0.22, + "grad_norm": 2.1651244429805545, + "learning_rate": 9.100935067877685e-06, + "loss": 0.5033, "step": 3073 }, { - "epoch": 0.32, - "grad_norm": 3.016887449204389, - "learning_rate": 7.907263506187655e-06, - "loss": 0.577, + "epoch": 0.22, + "grad_norm": 1.6567172547293063, + "learning_rate": 9.100277526318779e-06, + "loss": 0.5601, "step": 3074 }, { - "epoch": 0.32, - "grad_norm": 2.229569058131735, - "learning_rate": 7.905876802884639e-06, - "loss": 0.6829, + "epoch": 0.22, + "grad_norm": 2.1674195417716895, + "learning_rate": 9.099619768169318e-06, + "loss": 0.5464, "step": 3075 }, { - "epoch": 0.32, - "grad_norm": 3.650463843334712, - "learning_rate": 7.904489761985254e-06, - "loss": 0.6764, + "epoch": 0.22, + "grad_norm": 1.7140369806289208, + "learning_rate": 9.098961793464051e-06, + "loss": 0.5528, "step": 3076 }, { - "epoch": 0.32, - "grad_norm": 2.5592076303986047, - "learning_rate": 7.903102383650645e-06, - "loss": 0.623, + "epoch": 0.22, + "grad_norm": 1.7321959108250378, + "learning_rate": 9.098303602237734e-06, + "loss": 0.4875, "step": 3077 }, { - "epoch": 0.32, - "grad_norm": 5.032748852875668, - "learning_rate": 7.901714668041993e-06, - "loss": 0.6891, + "epoch": 0.22, + "grad_norm": 2.3027362519402357, + "learning_rate": 9.097645194525132e-06, + "loss": 0.5923, "step": 3078 }, { - "epoch": 0.32, - "grad_norm": 2.317465920800203, - "learning_rate": 7.90032661532052e-06, - "loss": 0.6487, + "epoch": 0.22, + "grad_norm": 3.5778195118329523, + "learning_rate": 9.096986570361025e-06, + "loss": 0.6187, "step": 3079 }, { - "epoch": 0.32, - "grad_norm": 2.305967746812263, - "learning_rate": 7.898938225647484e-06, - "loss": 0.6823, + "epoch": 0.22, + "grad_norm": 1.5452349765821802, + "learning_rate": 9.096327729780208e-06, + "loss": 0.509, "step": 3080 }, { - "epoch": 0.32, - "grad_norm": 2.8884399615654206, - "learning_rate": 7.897549499184184e-06, - "loss": 0.6296, + "epoch": 0.22, + "grad_norm": 1.7513362263263774, + "learning_rate": 9.09566867281748e-06, + "loss": 0.475, "step": 3081 }, { - "epoch": 0.32, - "grad_norm": 3.0770385988445907, - "learning_rate": 7.896160436091961e-06, - "loss": 0.6943, + "epoch": 0.22, + "grad_norm": 1.6869460697049827, + "learning_rate": 9.095009399507651e-06, + "loss": 0.5845, "step": 3082 }, { - "epoch": 0.32, - "grad_norm": 2.4056547215114055, - "learning_rate": 7.894771036532189e-06, - "loss": 0.5929, + "epoch": 0.22, + "grad_norm": 2.1525324766876475, + "learning_rate": 9.094349909885553e-06, + "loss": 0.6148, "step": 3083 }, { - "epoch": 0.32, - "grad_norm": 2.4323552445732304, - "learning_rate": 7.893381300666287e-06, - "loss": 0.6887, + "epoch": 0.22, + "grad_norm": 1.4732447370896398, + "learning_rate": 9.093690203986018e-06, + "loss": 0.5398, "step": 3084 }, { - "epoch": 0.32, - "grad_norm": 2.343998812827096, - "learning_rate": 7.89199122865571e-06, - "loss": 0.6709, + "epoch": 0.22, + "grad_norm": 2.121187467243923, + "learning_rate": 9.093030281843896e-06, + "loss": 0.5999, "step": 3085 }, { - "epoch": 0.32, - "grad_norm": 3.0146219044287164, - "learning_rate": 7.89060082066195e-06, - "loss": 0.6724, + "epoch": 0.22, + "grad_norm": 1.7718578575274462, + "learning_rate": 9.092370143494043e-06, + "loss": 0.6064, "step": 3086 }, { - "epoch": 0.32, - "grad_norm": 2.6033290148752357, - "learning_rate": 7.889210076846544e-06, - "loss": 0.6815, + "epoch": 0.22, + "grad_norm": 1.6220264694283442, + "learning_rate": 9.091709788971335e-06, + "loss": 0.5375, "step": 3087 }, { - "epoch": 0.32, - "grad_norm": 2.381411272217176, - "learning_rate": 7.887818997371062e-06, - "loss": 0.6404, + "epoch": 0.22, + "grad_norm": 0.9744193033401805, + "learning_rate": 9.09104921831065e-06, + "loss": 0.4611, "step": 3088 }, { - "epoch": 0.33, - "grad_norm": 2.904456744874637, - "learning_rate": 7.886427582397117e-06, - "loss": 0.7417, + "epoch": 0.22, + "grad_norm": 1.5289811049002087, + "learning_rate": 9.090388431546882e-06, + "loss": 0.6079, "step": 3089 }, { - "epoch": 0.33, - "grad_norm": 2.2786773692260547, - "learning_rate": 7.88503583208636e-06, - "loss": 0.5864, + "epoch": 0.22, + "grad_norm": 1.6565973319460905, + "learning_rate": 9.089727428714938e-06, + "loss": 0.575, "step": 3090 }, { - "epoch": 0.33, - "grad_norm": 2.81932535707139, - "learning_rate": 7.88364374660048e-06, - "loss": 0.7001, + "epoch": 0.22, + "grad_norm": 1.6525236450410181, + "learning_rate": 9.089066209849734e-06, + "loss": 0.627, "step": 3091 }, { - "epoch": 0.33, - "grad_norm": 2.6849729938128997, - "learning_rate": 7.882251326101205e-06, - "loss": 0.6875, + "epoch": 0.22, + "grad_norm": 1.4995656755075468, + "learning_rate": 9.088404774986197e-06, + "loss": 0.5593, "step": 3092 }, { - "epoch": 0.33, - "grad_norm": 3.3310488217574488, - "learning_rate": 7.8808585707503e-06, - "loss": 0.698, + "epoch": 0.22, + "grad_norm": 1.7881781445518559, + "learning_rate": 9.087743124159265e-06, + "loss": 0.5466, "step": 3093 }, { - "epoch": 0.33, - "grad_norm": 2.6002798711965975, - "learning_rate": 7.879465480709577e-06, - "loss": 0.6669, + "epoch": 0.22, + "grad_norm": 1.7028781818879257, + "learning_rate": 9.087081257403891e-06, + "loss": 0.5726, "step": 3094 }, { - "epoch": 0.33, - "grad_norm": 2.8876056113874142, - "learning_rate": 7.878072056140878e-06, - "loss": 0.6834, + "epoch": 0.22, + "grad_norm": 5.093367862754871, + "learning_rate": 9.086419174755037e-06, + "loss": 0.5511, "step": 3095 }, { - "epoch": 0.33, - "grad_norm": 3.537815529224319, - "learning_rate": 7.876678297206086e-06, - "loss": 0.7348, + "epoch": 0.22, + "grad_norm": 2.0645638652715963, + "learning_rate": 9.085756876247673e-06, + "loss": 0.6291, "step": 3096 }, { - "epoch": 0.33, - "grad_norm": 2.8620170280847104, - "learning_rate": 7.875284204067127e-06, - "loss": 0.6779, + "epoch": 0.22, + "grad_norm": 1.6252950775012807, + "learning_rate": 9.08509436191679e-06, + "loss": 0.5694, "step": 3097 }, { - "epoch": 0.33, - "grad_norm": 3.283985994119008, - "learning_rate": 7.873889776885959e-06, - "loss": 0.6443, + "epoch": 0.22, + "grad_norm": 1.9635615007090768, + "learning_rate": 9.084431631797376e-06, + "loss": 0.5548, "step": 3098 }, { - "epoch": 0.33, - "grad_norm": 3.3105599250764297, - "learning_rate": 7.872495015824586e-06, - "loss": 0.6744, + "epoch": 0.22, + "grad_norm": 0.8718469414504493, + "learning_rate": 9.083768685924447e-06, + "loss": 0.4481, "step": 3099 }, { - "epoch": 0.33, - "grad_norm": 2.9406649266002853, - "learning_rate": 7.871099921045042e-06, - "loss": 0.6953, + "epoch": 0.22, + "grad_norm": 1.5713198604391758, + "learning_rate": 9.083105524333017e-06, + "loss": 0.571, "step": 3100 }, { - "epoch": 0.33, - "grad_norm": 2.574644681179438, - "learning_rate": 7.869704492709412e-06, - "loss": 0.6711, + "epoch": 0.22, + "grad_norm": 1.727830394815153, + "learning_rate": 9.082442147058118e-06, + "loss": 0.548, "step": 3101 }, { - "epoch": 0.33, - "grad_norm": 2.449965941126261, - "learning_rate": 7.868308730979809e-06, - "loss": 0.628, + "epoch": 0.22, + "grad_norm": 2.1173326917806565, + "learning_rate": 9.081778554134792e-06, + "loss": 0.6017, "step": 3102 }, { - "epoch": 0.33, - "grad_norm": 2.772397515441643, - "learning_rate": 7.866912636018389e-06, - "loss": 0.7371, + "epoch": 0.22, + "grad_norm": 1.5990508472947116, + "learning_rate": 9.081114745598089e-06, + "loss": 0.6131, "step": 3103 }, { - "epoch": 0.33, - "grad_norm": 2.39442828453295, - "learning_rate": 7.865516207987344e-06, - "loss": 0.63, + "epoch": 0.22, + "grad_norm": 1.8748652438810725, + "learning_rate": 9.08045072148308e-06, + "loss": 0.5421, "step": 3104 }, { - "epoch": 0.33, - "grad_norm": 3.3246795963005713, - "learning_rate": 7.864119447048912e-06, - "loss": 0.7073, + "epoch": 0.22, + "grad_norm": 1.9680056113957995, + "learning_rate": 9.079786481824837e-06, + "loss": 0.5655, "step": 3105 }, { - "epoch": 0.33, - "grad_norm": 3.0064966422057773, - "learning_rate": 7.862722353365361e-06, - "loss": 0.7265, + "epoch": 0.22, + "grad_norm": 1.5654685817106608, + "learning_rate": 9.079122026658447e-06, + "loss": 0.5789, "step": 3106 }, { - "epoch": 0.33, - "grad_norm": 3.0807358878776006, - "learning_rate": 7.861324927099004e-06, - "loss": 0.7145, + "epoch": 0.22, + "grad_norm": 1.4785686646714231, + "learning_rate": 9.078457356019009e-06, + "loss": 0.5951, "step": 3107 }, { - "epoch": 0.33, - "grad_norm": 1.069135073025866, - "learning_rate": 7.859927168412186e-06, - "loss": 0.6135, + "epoch": 0.22, + "grad_norm": 1.7930021746256048, + "learning_rate": 9.077792469941634e-06, + "loss": 0.5721, "step": 3108 }, { - "epoch": 0.33, - "grad_norm": 3.286312915224411, - "learning_rate": 7.858529077467298e-06, - "loss": 0.6363, + "epoch": 0.22, + "grad_norm": 1.8132628413020246, + "learning_rate": 9.077127368461443e-06, + "loss": 0.5646, "step": 3109 }, { - "epoch": 0.33, - "grad_norm": 2.2389602662700385, - "learning_rate": 7.857130654426764e-06, - "loss": 0.6131, + "epoch": 0.22, + "grad_norm": 2.008328865431607, + "learning_rate": 9.076462051613571e-06, + "loss": 0.5708, "step": 3110 }, { - "epoch": 0.33, - "grad_norm": 3.36015735420768, - "learning_rate": 7.85573189945305e-06, - "loss": 0.5841, + "epoch": 0.22, + "grad_norm": 1.9843715015650085, + "learning_rate": 9.07579651943316e-06, + "loss": 0.5827, "step": 3111 }, { - "epoch": 0.33, - "grad_norm": 2.7462815401691096, - "learning_rate": 7.854332812708661e-06, - "loss": 0.6193, + "epoch": 0.22, + "grad_norm": 1.6756450947493984, + "learning_rate": 9.075130771955363e-06, + "loss": 0.5647, "step": 3112 }, { - "epoch": 0.33, - "grad_norm": 3.0603590845258815, - "learning_rate": 7.852933394356134e-06, - "loss": 0.7154, + "epoch": 0.22, + "grad_norm": 1.6676190701104636, + "learning_rate": 9.074464809215353e-06, + "loss": 0.5165, "step": 3113 }, { - "epoch": 0.33, - "grad_norm": 2.6664238931594584, - "learning_rate": 7.851533644558054e-06, - "loss": 0.747, + "epoch": 0.22, + "grad_norm": 0.8352266492758975, + "learning_rate": 9.073798631248304e-06, + "loss": 0.4389, "step": 3114 }, { - "epoch": 0.33, - "grad_norm": 2.859101742503454, - "learning_rate": 7.850133563477037e-06, - "loss": 0.6958, + "epoch": 0.22, + "grad_norm": 1.7183609672210136, + "learning_rate": 9.07313223808941e-06, + "loss": 0.5506, "step": 3115 }, { - "epoch": 0.33, - "grad_norm": 2.669270847924662, - "learning_rate": 7.848733151275741e-06, - "loss": 0.5718, + "epoch": 0.22, + "grad_norm": 1.6881205091380187, + "learning_rate": 9.072465629773868e-06, + "loss": 0.5869, "step": 3116 }, { - "epoch": 0.33, - "grad_norm": 3.7219654697356863, - "learning_rate": 7.847332408116863e-06, - "loss": 0.6531, + "epoch": 0.22, + "grad_norm": 2.0244066563487637, + "learning_rate": 9.071798806336891e-06, + "loss": 0.5687, "step": 3117 }, { - "epoch": 0.33, - "grad_norm": 3.7912563805814945, - "learning_rate": 7.845931334163138e-06, - "loss": 0.7386, + "epoch": 0.22, + "grad_norm": 1.8098370941200939, + "learning_rate": 9.071131767813706e-06, + "loss": 0.6398, "step": 3118 }, { - "epoch": 0.33, - "grad_norm": 2.8886193603297348, - "learning_rate": 7.844529929577336e-06, - "loss": 0.6827, + "epoch": 0.22, + "grad_norm": 5.437316812534526, + "learning_rate": 9.070464514239546e-06, + "loss": 0.5939, "step": 3119 }, { - "epoch": 0.33, - "grad_norm": 2.586772606669079, - "learning_rate": 7.84312819452227e-06, - "loss": 0.6084, + "epoch": 0.22, + "grad_norm": 0.9179022188886208, + "learning_rate": 9.069797045649657e-06, + "loss": 0.4554, "step": 3120 }, { - "epoch": 0.33, - "grad_norm": 2.6184892406372193, - "learning_rate": 7.841726129160789e-06, - "loss": 0.6775, + "epoch": 0.22, + "grad_norm": 1.9506358614996155, + "learning_rate": 9.069129362079297e-06, + "loss": 0.5876, "step": 3121 }, { - "epoch": 0.33, - "grad_norm": 2.3983415224825637, - "learning_rate": 7.84032373365578e-06, - "loss": 0.6357, + "epoch": 0.22, + "grad_norm": 5.486386582874587, + "learning_rate": 9.068461463563737e-06, + "loss": 0.5857, "step": 3122 }, { - "epoch": 0.33, - "grad_norm": 3.1697316223767333, - "learning_rate": 7.838921008170171e-06, - "loss": 0.7038, + "epoch": 0.22, + "grad_norm": 1.5037629719161791, + "learning_rate": 9.067793350138256e-06, + "loss": 0.5586, "step": 3123 }, { - "epoch": 0.33, - "grad_norm": 2.538074643605018, - "learning_rate": 7.837517952866924e-06, - "loss": 0.6816, + "epoch": 0.22, + "grad_norm": 1.4931773545957066, + "learning_rate": 9.067125021838148e-06, + "loss": 0.4816, "step": 3124 }, { - "epoch": 0.33, - "grad_norm": 2.723960615531358, - "learning_rate": 7.836114567909046e-06, - "loss": 0.6351, + "epoch": 0.22, + "grad_norm": 1.7887614862282148, + "learning_rate": 9.066456478698713e-06, + "loss": 0.547, "step": 3125 }, { - "epoch": 0.33, - "grad_norm": 4.3141201817543715, - "learning_rate": 7.834710853459575e-06, - "loss": 0.6886, + "epoch": 0.22, + "grad_norm": 1.6005205231940767, + "learning_rate": 9.06578772075527e-06, + "loss": 0.5714, "step": 3126 }, { - "epoch": 0.33, - "grad_norm": 6.935659578134951, - "learning_rate": 7.833306809681593e-06, - "loss": 0.6418, + "epoch": 0.22, + "grad_norm": 1.7270619725929421, + "learning_rate": 9.06511874804314e-06, + "loss": 0.5639, "step": 3127 }, { - "epoch": 0.33, - "grad_norm": 2.8856037758667292, - "learning_rate": 7.831902436738215e-06, - "loss": 0.6849, + "epoch": 0.22, + "grad_norm": 1.654115368339888, + "learning_rate": 9.064449560597665e-06, + "loss": 0.6503, "step": 3128 }, { - "epoch": 0.33, - "grad_norm": 8.563709509210556, - "learning_rate": 7.830497734792597e-06, - "loss": 0.6117, + "epoch": 0.22, + "grad_norm": 2.0449235359035933, + "learning_rate": 9.063780158454192e-06, + "loss": 0.6487, "step": 3129 }, { - "epoch": 0.33, - "grad_norm": 2.927718266986384, - "learning_rate": 7.829092704007935e-06, - "loss": 0.6543, + "epoch": 0.22, + "grad_norm": 1.5104037662690675, + "learning_rate": 9.063110541648082e-06, + "loss": 0.5165, "step": 3130 }, { - "epoch": 0.33, - "grad_norm": 2.675602881652366, - "learning_rate": 7.827687344547459e-06, - "loss": 0.6443, + "epoch": 0.22, + "grad_norm": 1.5848126807708425, + "learning_rate": 9.062440710214705e-06, + "loss": 0.5708, "step": 3131 }, { - "epoch": 0.33, - "grad_norm": 2.732964806627827, - "learning_rate": 7.826281656574444e-06, - "loss": 0.6704, + "epoch": 0.22, + "grad_norm": 1.736122500201738, + "learning_rate": 9.061770664189442e-06, + "loss": 0.5551, "step": 3132 }, { - "epoch": 0.33, - "grad_norm": 2.9065789120991763, - "learning_rate": 7.824875640252195e-06, - "loss": 0.7202, + "epoch": 0.22, + "grad_norm": 3.367558166958934, + "learning_rate": 9.061100403607691e-06, + "loss": 0.6132, "step": 3133 }, { - "epoch": 0.33, - "grad_norm": 2.5054335560181444, - "learning_rate": 7.82346929574406e-06, - "loss": 0.7212, + "epoch": 0.22, + "grad_norm": 1.654540643172597, + "learning_rate": 9.060429928504857e-06, + "loss": 0.6636, "step": 3134 }, { - "epoch": 0.33, - "grad_norm": 2.5085507480711065, - "learning_rate": 7.822062623213424e-06, - "loss": 0.7014, + "epoch": 0.22, + "grad_norm": 1.6438141788245617, + "learning_rate": 9.059759238916353e-06, + "loss": 0.5702, "step": 3135 }, { - "epoch": 0.33, - "grad_norm": 8.434812127737116, - "learning_rate": 7.820655622823712e-06, - "loss": 0.6936, + "epoch": 0.22, + "grad_norm": 1.7403698333898585, + "learning_rate": 9.059088334877611e-06, + "loss": 0.582, "step": 3136 }, { - "epoch": 0.33, - "grad_norm": 7.281720661318549, - "learning_rate": 7.819248294738381e-06, - "loss": 0.6299, + "epoch": 0.22, + "grad_norm": 1.6329809783966027, + "learning_rate": 9.05841721642407e-06, + "loss": 0.5948, "step": 3137 }, { - "epoch": 0.33, - "grad_norm": 3.336679866157924, - "learning_rate": 7.817840639120932e-06, - "loss": 0.6982, + "epoch": 0.22, + "grad_norm": 2.0586932552627926, + "learning_rate": 9.057745883591178e-06, + "loss": 0.5378, "step": 3138 }, { - "epoch": 0.33, - "grad_norm": 2.7369577275240657, - "learning_rate": 7.816432656134907e-06, - "loss": 0.6836, + "epoch": 0.22, + "grad_norm": 1.977287579514969, + "learning_rate": 9.0570743364144e-06, + "loss": 0.5554, "step": 3139 }, { - "epoch": 0.33, - "grad_norm": 3.408523586518461, - "learning_rate": 7.815024345943874e-06, - "loss": 0.6304, + "epoch": 0.22, + "grad_norm": 1.5794454937333202, + "learning_rate": 9.05640257492921e-06, + "loss": 0.5125, "step": 3140 }, { - "epoch": 0.33, - "grad_norm": 2.7339532046846244, - "learning_rate": 7.81361570871145e-06, - "loss": 0.6901, + "epoch": 0.22, + "grad_norm": 1.596614052956998, + "learning_rate": 9.055730599171089e-06, + "loss": 0.554, "step": 3141 }, { - "epoch": 0.33, - "grad_norm": 2.55643397634351, - "learning_rate": 7.812206744601288e-06, - "loss": 0.7034, + "epoch": 0.22, + "grad_norm": 1.8867744802359427, + "learning_rate": 9.055058409175534e-06, + "loss": 0.5656, "step": 3142 }, { - "epoch": 0.33, - "grad_norm": 2.7516095557400058, - "learning_rate": 7.810797453777076e-06, - "loss": 0.5308, + "epoch": 0.22, + "grad_norm": 1.7118194274901917, + "learning_rate": 9.054386004978056e-06, + "loss": 0.5474, "step": 3143 }, { - "epoch": 0.33, - "grad_norm": 2.5846496687334355, - "learning_rate": 7.80938783640254e-06, - "loss": 0.7117, + "epoch": 0.22, + "grad_norm": 0.8479823257073037, + "learning_rate": 9.053713386614169e-06, + "loss": 0.4517, "step": 3144 }, { - "epoch": 0.33, - "grad_norm": 2.8518803010333, - "learning_rate": 7.807977892641446e-06, - "loss": 0.7384, + "epoch": 0.22, + "grad_norm": 1.8656134619377513, + "learning_rate": 9.053040554119405e-06, + "loss": 0.6026, "step": 3145 }, { - "epoch": 0.33, - "grad_norm": 3.978801136142066, - "learning_rate": 7.806567622657598e-06, - "loss": 0.6169, + "epoch": 0.22, + "grad_norm": 1.5209063837707966, + "learning_rate": 9.052367507529307e-06, + "loss": 0.5906, "step": 3146 }, { - "epoch": 0.33, - "grad_norm": 3.8381379717790853, - "learning_rate": 7.805157026614836e-06, - "loss": 0.6696, + "epoch": 0.22, + "grad_norm": 2.261674115424456, + "learning_rate": 9.051694246879425e-06, + "loss": 0.5342, "step": 3147 }, { - "epoch": 0.33, - "grad_norm": 2.6451501046738706, - "learning_rate": 7.80374610467704e-06, - "loss": 0.695, + "epoch": 0.22, + "grad_norm": 1.988044036841759, + "learning_rate": 9.051020772205323e-06, + "loss": 0.4756, "step": 3148 }, { - "epoch": 0.33, - "grad_norm": 2.6556926713393936, - "learning_rate": 7.802334857008127e-06, - "loss": 0.5896, + "epoch": 0.22, + "grad_norm": 1.4624886894755698, + "learning_rate": 9.050347083542579e-06, + "loss": 0.5502, "step": 3149 }, { - "epoch": 0.33, - "grad_norm": 2.3511219203796054, - "learning_rate": 7.800923283772051e-06, - "loss": 0.6078, + "epoch": 0.22, + "grad_norm": 1.879854095228839, + "learning_rate": 9.049673180926776e-06, + "loss": 0.5773, "step": 3150 }, { - "epoch": 0.33, - "grad_norm": 2.3055445730449935, - "learning_rate": 7.799511385132803e-06, - "loss": 0.7246, + "epoch": 0.22, + "grad_norm": 0.8808234635941111, + "learning_rate": 9.048999064393515e-06, + "loss": 0.4609, "step": 3151 }, { - "epoch": 0.33, - "grad_norm": 2.807571982019713, - "learning_rate": 7.798099161254415e-06, - "loss": 0.5812, + "epoch": 0.22, + "grad_norm": 1.3100963012882794, + "learning_rate": 9.048324733978403e-06, + "loss": 0.5195, "step": 3152 }, { - "epoch": 0.33, - "grad_norm": 2.9073765971064183, - "learning_rate": 7.796686612300957e-06, - "loss": 0.5974, + "epoch": 0.22, + "grad_norm": 1.4767594867468434, + "learning_rate": 9.047650189717059e-06, + "loss": 0.5254, "step": 3153 }, { - "epoch": 0.33, - "grad_norm": 2.580983486608408, - "learning_rate": 7.795273738436531e-06, - "loss": 0.6505, + "epoch": 0.22, + "grad_norm": 1.5961991802631599, + "learning_rate": 9.046975431645118e-06, + "loss": 0.5706, "step": 3154 }, { - "epoch": 0.33, - "grad_norm": 2.27033419980578, - "learning_rate": 7.793860539825282e-06, - "loss": 0.6077, + "epoch": 0.22, + "grad_norm": 1.509472653220536, + "learning_rate": 9.046300459798224e-06, + "loss": 0.5875, "step": 3155 }, { - "epoch": 0.33, - "grad_norm": 2.736113101860133, - "learning_rate": 7.792447016631392e-06, - "loss": 0.5951, + "epoch": 0.22, + "grad_norm": 2.5321711694578064, + "learning_rate": 9.045625274212026e-06, + "loss": 0.6784, "step": 3156 }, { - "epoch": 0.33, - "grad_norm": 2.1064869720748733, - "learning_rate": 7.79103316901908e-06, - "loss": 0.6632, + "epoch": 0.22, + "grad_norm": 1.6907334929537008, + "learning_rate": 9.044949874922193e-06, + "loss": 0.5262, "step": 3157 }, { - "epoch": 0.33, - "grad_norm": 3.0989709229973887, - "learning_rate": 7.789618997152603e-06, - "loss": 0.7188, + "epoch": 0.22, + "grad_norm": 3.356484201048197, + "learning_rate": 9.044274261964402e-06, + "loss": 0.5843, "step": 3158 }, { - "epoch": 0.33, - "grad_norm": 3.06812401144027, - "learning_rate": 7.788204501196255e-06, - "loss": 0.6171, + "epoch": 0.22, + "grad_norm": 0.8543757115051713, + "learning_rate": 9.04359843537434e-06, + "loss": 0.4665, "step": 3159 }, { - "epoch": 0.33, - "grad_norm": 2.691041503384547, - "learning_rate": 7.786789681314368e-06, - "loss": 0.604, + "epoch": 0.22, + "grad_norm": 0.7805994287073599, + "learning_rate": 9.042922395187707e-06, + "loss": 0.4514, "step": 3160 }, { - "epoch": 0.33, - "grad_norm": 2.614511086385431, - "learning_rate": 7.785374537671311e-06, - "loss": 0.6663, + "epoch": 0.22, + "grad_norm": 1.949001517945895, + "learning_rate": 9.042246141440215e-06, + "loss": 0.6188, "step": 3161 }, { - "epoch": 0.33, - "grad_norm": 2.6590381539539054, - "learning_rate": 7.783959070431492e-06, - "loss": 0.7339, + "epoch": 0.22, + "grad_norm": 1.438168256474461, + "learning_rate": 9.041569674167584e-06, + "loss": 0.6087, "step": 3162 }, { - "epoch": 0.33, - "grad_norm": 1.1374038834717708, - "learning_rate": 7.782543279759356e-06, - "loss": 0.5921, + "epoch": 0.22, + "grad_norm": 2.2430618881643043, + "learning_rate": 9.040892993405548e-06, + "loss": 0.5177, "step": 3163 }, { - "epoch": 0.33, - "grad_norm": 2.7699969206806414, - "learning_rate": 7.781127165819386e-06, - "loss": 0.767, + "epoch": 0.22, + "grad_norm": 1.7107059465929517, + "learning_rate": 9.040216099189853e-06, + "loss": 0.5733, "step": 3164 }, { - "epoch": 0.33, - "grad_norm": 2.0646485786942144, - "learning_rate": 7.7797107287761e-06, - "loss": 0.657, + "epoch": 0.22, + "grad_norm": 1.8738235906946177, + "learning_rate": 9.039538991556251e-06, + "loss": 0.5719, "step": 3165 }, { - "epoch": 0.33, - "grad_norm": 3.383226812212046, - "learning_rate": 7.778293968794056e-06, - "loss": 0.6619, + "epoch": 0.22, + "grad_norm": 0.9156006139823603, + "learning_rate": 9.038861670540515e-06, + "loss": 0.4705, "step": 3166 }, { - "epoch": 0.33, - "grad_norm": 2.3946861066654246, - "learning_rate": 7.776876886037852e-06, - "loss": 0.7148, + "epoch": 0.22, + "grad_norm": 1.8856080482980109, + "learning_rate": 9.038184136178418e-06, + "loss": 0.5264, "step": 3167 }, { - "epoch": 0.33, - "grad_norm": 4.2920273098631965, - "learning_rate": 7.775459480672117e-06, - "loss": 0.6964, + "epoch": 0.22, + "grad_norm": 2.0971354295252937, + "learning_rate": 9.037506388505752e-06, + "loss": 0.5696, "step": 3168 }, { - "epoch": 0.33, - "grad_norm": 2.74757852495417, - "learning_rate": 7.774041752861524e-06, - "loss": 0.6485, + "epoch": 0.22, + "grad_norm": 0.7787253738797677, + "learning_rate": 9.036828427558318e-06, + "loss": 0.4745, "step": 3169 }, { - "epoch": 0.33, - "grad_norm": 3.8429087492176057, - "learning_rate": 7.772623702770779e-06, - "loss": 0.7287, + "epoch": 0.22, + "grad_norm": 1.8408295328731465, + "learning_rate": 9.036150253371925e-06, + "loss": 0.5924, "step": 3170 }, { - "epoch": 0.33, - "grad_norm": 2.6463677367731218, - "learning_rate": 7.771205330564626e-06, - "loss": 0.6197, + "epoch": 0.23, + "grad_norm": 1.4882093061664055, + "learning_rate": 9.035471865982403e-06, + "loss": 0.5788, "step": 3171 }, { - "epoch": 0.33, - "grad_norm": 2.3146582536567784, - "learning_rate": 7.769786636407849e-06, - "loss": 0.673, + "epoch": 0.23, + "grad_norm": 1.5899424147773533, + "learning_rate": 9.034793265425581e-06, + "loss": 0.5356, "step": 3172 }, { - "epoch": 0.33, - "grad_norm": 3.097668002633075, - "learning_rate": 7.768367620465267e-06, - "loss": 0.6628, + "epoch": 0.23, + "grad_norm": 2.1798846969232093, + "learning_rate": 9.034114451737308e-06, + "loss": 0.6126, "step": 3173 }, { - "epoch": 0.33, - "grad_norm": 2.6251746945849934, - "learning_rate": 7.766948282901738e-06, - "loss": 0.6698, + "epoch": 0.23, + "grad_norm": 1.6979950496862446, + "learning_rate": 9.03343542495344e-06, + "loss": 0.5724, "step": 3174 }, { - "epoch": 0.33, - "grad_norm": 2.7114682635233325, - "learning_rate": 7.765528623882155e-06, - "loss": 0.6594, + "epoch": 0.23, + "grad_norm": 1.5276463098277349, + "learning_rate": 9.032756185109846e-06, + "loss": 0.5126, "step": 3175 }, { - "epoch": 0.33, - "grad_norm": 3.4612588856168838, - "learning_rate": 7.76410864357145e-06, - "loss": 0.6582, + "epoch": 0.23, + "grad_norm": 1.5902395120542454, + "learning_rate": 9.032076732242402e-06, + "loss": 0.5489, "step": 3176 }, { - "epoch": 0.33, - "grad_norm": 2.874426831691072, - "learning_rate": 7.762688342134597e-06, - "loss": 0.6661, + "epoch": 0.23, + "grad_norm": 1.5175120820650618, + "learning_rate": 9.031397066387007e-06, + "loss": 0.5854, "step": 3177 }, { - "epoch": 0.33, - "grad_norm": 2.9148574564549485, - "learning_rate": 7.761267719736593e-06, - "loss": 0.6976, + "epoch": 0.23, + "grad_norm": 0.9342772166868714, + "learning_rate": 9.030717187579556e-06, + "loss": 0.4609, "step": 3178 }, { - "epoch": 0.33, - "grad_norm": 3.0616632712894885, - "learning_rate": 7.759846776542492e-06, - "loss": 0.6677, + "epoch": 0.23, + "grad_norm": 1.6831787485820295, + "learning_rate": 9.030037095855969e-06, + "loss": 0.6257, "step": 3179 }, { - "epoch": 0.33, - "grad_norm": 3.561508539134428, - "learning_rate": 7.75842551271737e-06, - "loss": 0.6977, + "epoch": 0.23, + "grad_norm": 1.8740556813943692, + "learning_rate": 9.029356791252162e-06, + "loss": 0.5001, "step": 3180 }, { - "epoch": 0.33, - "grad_norm": 3.298240212255653, - "learning_rate": 7.757003928426342e-06, - "loss": 0.6149, + "epoch": 0.23, + "grad_norm": 1.5362067362839642, + "learning_rate": 9.028676273804078e-06, + "loss": 0.5865, "step": 3181 }, { - "epoch": 0.33, - "grad_norm": 2.4188530769511982, - "learning_rate": 7.755582023834572e-06, - "loss": 0.6496, + "epoch": 0.23, + "grad_norm": 1.7996518819584841, + "learning_rate": 9.027995543547663e-06, + "loss": 0.5435, "step": 3182 }, { - "epoch": 0.33, - "grad_norm": 2.8398810101886345, - "learning_rate": 7.754159799107244e-06, - "loss": 0.6351, + "epoch": 0.23, + "grad_norm": 1.7024688883722179, + "learning_rate": 9.027314600518874e-06, + "loss": 0.5781, "step": 3183 }, { - "epoch": 0.34, - "grad_norm": 2.2499424827555927, - "learning_rate": 7.752737254409594e-06, - "loss": 0.7088, + "epoch": 0.23, + "grad_norm": 1.514506012791769, + "learning_rate": 9.026633444753681e-06, + "loss": 0.6121, "step": 3184 }, { - "epoch": 0.34, - "grad_norm": 2.3814957039535374, - "learning_rate": 7.751314389906887e-06, - "loss": 0.6245, + "epoch": 0.23, + "grad_norm": 2.08311835939007, + "learning_rate": 9.025952076288066e-06, + "loss": 0.5907, "step": 3185 }, { - "epoch": 0.34, - "grad_norm": 2.666046394806816, - "learning_rate": 7.749891205764427e-06, - "loss": 0.6133, + "epoch": 0.23, + "grad_norm": 1.8302151715246762, + "learning_rate": 9.02527049515802e-06, + "loss": 0.5682, "step": 3186 }, { - "epoch": 0.34, - "grad_norm": 2.7799564388267295, - "learning_rate": 7.748467702147555e-06, - "loss": 0.605, + "epoch": 0.23, + "grad_norm": 1.5123927115275206, + "learning_rate": 9.024588701399548e-06, + "loss": 0.5855, "step": 3187 }, { - "epoch": 0.34, - "grad_norm": 5.281029160468379, - "learning_rate": 7.747043879221653e-06, - "loss": 0.6321, + "epoch": 0.23, + "grad_norm": 1.6105341535733377, + "learning_rate": 9.023906695048663e-06, + "loss": 0.5223, "step": 3188 }, { - "epoch": 0.34, - "grad_norm": 2.934308558200218, - "learning_rate": 7.745619737152133e-06, - "loss": 0.7053, + "epoch": 0.23, + "grad_norm": 1.8365580546536424, + "learning_rate": 9.023224476141392e-06, + "loss": 0.576, "step": 3189 }, { - "epoch": 0.34, - "grad_norm": 3.1545251666263026, - "learning_rate": 7.744195276104447e-06, - "loss": 0.6998, + "epoch": 0.23, + "grad_norm": 2.7792567424905457, + "learning_rate": 9.022542044713772e-06, + "loss": 0.6261, "step": 3190 }, { - "epoch": 0.34, - "grad_norm": 2.642275030044787, - "learning_rate": 7.742770496244087e-06, - "loss": 0.7263, + "epoch": 0.23, + "grad_norm": 2.0840321674602618, + "learning_rate": 9.021859400801849e-06, + "loss": 0.6255, "step": 3191 }, { - "epoch": 0.34, - "grad_norm": 1.9956469346760177, - "learning_rate": 7.74134539773658e-06, - "loss": 0.6707, + "epoch": 0.23, + "grad_norm": 1.6197389342296826, + "learning_rate": 9.021176544441686e-06, + "loss": 0.56, "step": 3192 }, { - "epoch": 0.34, - "grad_norm": 4.099408512549327, - "learning_rate": 7.73991998074749e-06, - "loss": 0.6832, + "epoch": 0.23, + "grad_norm": 1.668489064341011, + "learning_rate": 9.020493475669351e-06, + "loss": 0.4814, "step": 3193 }, { - "epoch": 0.34, - "grad_norm": 5.473717647736508, - "learning_rate": 7.738494245442415e-06, - "loss": 0.5969, + "epoch": 0.23, + "grad_norm": 2.566645945335119, + "learning_rate": 9.019810194520929e-06, + "loss": 0.5516, "step": 3194 }, { - "epoch": 0.34, - "grad_norm": 2.67068434985646, - "learning_rate": 7.737068191986995e-06, - "loss": 0.7505, + "epoch": 0.23, + "grad_norm": 1.5689433753110227, + "learning_rate": 9.01912670103251e-06, + "loss": 0.5338, "step": 3195 }, { - "epoch": 0.34, - "grad_norm": 2.413731979539875, - "learning_rate": 7.735641820546906e-06, - "loss": 0.7376, + "epoch": 0.23, + "grad_norm": 1.523335427887631, + "learning_rate": 9.018442995240203e-06, + "loss": 0.5333, "step": 3196 }, { - "epoch": 0.34, - "grad_norm": 2.619766771399215, - "learning_rate": 7.73421513128786e-06, - "loss": 0.712, + "epoch": 0.23, + "grad_norm": 1.6807787147791986, + "learning_rate": 9.017759077180117e-06, + "loss": 0.5311, "step": 3197 }, { - "epoch": 0.34, - "grad_norm": 2.0705943862250664, - "learning_rate": 7.7327881243756e-06, - "loss": 0.701, + "epoch": 0.23, + "grad_norm": 1.649720565030237, + "learning_rate": 9.017074946888383e-06, + "loss": 0.5973, "step": 3198 }, { - "epoch": 0.34, - "grad_norm": 2.3139029563356512, - "learning_rate": 7.731360799975916e-06, - "loss": 0.6423, + "epoch": 0.23, + "grad_norm": 2.0174542827834907, + "learning_rate": 9.01639060440114e-06, + "loss": 0.5156, "step": 3199 }, { - "epoch": 0.34, - "grad_norm": 2.4451859469193082, - "learning_rate": 7.72993315825463e-06, - "loss": 0.6318, + "epoch": 0.23, + "grad_norm": 2.127181017777319, + "learning_rate": 9.015706049754536e-06, + "loss": 0.5636, "step": 3200 }, { - "epoch": 0.34, - "grad_norm": 2.2475814172050104, - "learning_rate": 7.728505199377603e-06, - "loss": 0.6631, + "epoch": 0.23, + "grad_norm": 2.7979103044509253, + "learning_rate": 9.015021282984731e-06, + "loss": 0.5667, "step": 3201 }, { - "epoch": 0.34, - "grad_norm": 3.3486986779317154, - "learning_rate": 7.727076923510727e-06, - "loss": 0.7424, + "epoch": 0.23, + "grad_norm": 1.5984173861538533, + "learning_rate": 9.014336304127896e-06, + "loss": 0.4766, "step": 3202 }, { - "epoch": 0.34, - "grad_norm": 2.121154876974287, - "learning_rate": 7.72564833081994e-06, - "loss": 0.6947, + "epoch": 0.23, + "grad_norm": 1.5740663042673293, + "learning_rate": 9.013651113220216e-06, + "loss": 0.6078, "step": 3203 }, { - "epoch": 0.34, - "grad_norm": 3.4236551024113093, - "learning_rate": 7.724219421471206e-06, - "loss": 0.727, + "epoch": 0.23, + "grad_norm": 1.7331960630207204, + "learning_rate": 9.012965710297885e-06, + "loss": 0.5903, "step": 3204 }, { - "epoch": 0.34, - "grad_norm": 2.346428595824038, - "learning_rate": 7.722790195630536e-06, - "loss": 0.6948, + "epoch": 0.23, + "grad_norm": 1.8477794178742395, + "learning_rate": 9.012280095397106e-06, + "loss": 0.6071, "step": 3205 }, { - "epoch": 0.34, - "grad_norm": 3.092820968756066, - "learning_rate": 7.721360653463971e-06, - "loss": 0.6849, + "epoch": 0.23, + "grad_norm": 2.1115818076535753, + "learning_rate": 9.011594268554097e-06, + "loss": 0.5624, "step": 3206 }, { - "epoch": 0.34, - "grad_norm": 2.5447700966964466, - "learning_rate": 7.719930795137592e-06, - "loss": 0.7097, + "epoch": 0.23, + "grad_norm": 1.55228582078478, + "learning_rate": 9.010908229805086e-06, + "loss": 0.574, "step": 3207 }, { - "epoch": 0.34, - "grad_norm": 3.2830897925687825, - "learning_rate": 7.718500620817517e-06, - "loss": 0.6177, + "epoch": 0.23, + "grad_norm": 1.989748746259987, + "learning_rate": 9.01022197918631e-06, + "loss": 0.5657, "step": 3208 }, { - "epoch": 0.34, - "grad_norm": 2.8208598121038104, - "learning_rate": 7.717070130669896e-06, - "loss": 0.6102, + "epoch": 0.23, + "grad_norm": 0.8006187945217477, + "learning_rate": 9.009535516734023e-06, + "loss": 0.4857, "step": 3209 }, { - "epoch": 0.34, - "grad_norm": 2.9927032819623642, - "learning_rate": 7.715639324860925e-06, - "loss": 0.6379, + "epoch": 0.23, + "grad_norm": 0.9356296153936299, + "learning_rate": 9.008848842484482e-06, + "loss": 0.49, "step": 3210 }, { - "epoch": 0.34, - "grad_norm": 2.6289931425920803, - "learning_rate": 7.714208203556825e-06, - "loss": 0.7293, + "epoch": 0.23, + "grad_norm": 0.8320898937708464, + "learning_rate": 9.008161956473962e-06, + "loss": 0.4812, "step": 3211 }, { - "epoch": 0.34, - "grad_norm": 3.899374378134415, - "learning_rate": 7.712776766923862e-06, - "loss": 0.6116, + "epoch": 0.23, + "grad_norm": 1.747559472593972, + "learning_rate": 9.007474858738748e-06, + "loss": 0.6297, "step": 3212 }, { - "epoch": 0.34, - "grad_norm": 3.2414892114848803, - "learning_rate": 7.711345015128335e-06, - "loss": 0.7262, + "epoch": 0.23, + "grad_norm": 2.8690172696322045, + "learning_rate": 9.00678754931513e-06, + "loss": 0.6248, "step": 3213 }, { - "epoch": 0.34, - "grad_norm": 3.45926603241094, - "learning_rate": 7.709912948336583e-06, - "loss": 0.7132, + "epoch": 0.23, + "grad_norm": 1.645209238416417, + "learning_rate": 9.006100028239418e-06, + "loss": 0.5483, "step": 3214 }, { - "epoch": 0.34, - "grad_norm": 3.182829948766781, - "learning_rate": 7.70848056671498e-06, - "loss": 0.6255, + "epoch": 0.23, + "grad_norm": 1.8269677384049103, + "learning_rate": 9.005412295547927e-06, + "loss": 0.6445, "step": 3215 }, { - "epoch": 0.34, - "grad_norm": 2.7748630647111376, - "learning_rate": 7.707047870429931e-06, - "loss": 0.6655, + "epoch": 0.23, + "grad_norm": 1.7625315294528323, + "learning_rate": 9.004724351276989e-06, + "loss": 0.5177, "step": 3216 }, { - "epoch": 0.34, - "grad_norm": 3.9189572306348666, - "learning_rate": 7.705614859647888e-06, - "loss": 0.6888, + "epoch": 0.23, + "grad_norm": 0.9442415497824801, + "learning_rate": 9.004036195462938e-06, + "loss": 0.496, "step": 3217 }, { - "epoch": 0.34, - "grad_norm": 1.1326017663302763, - "learning_rate": 7.704181534535332e-06, - "loss": 0.6407, + "epoch": 0.23, + "grad_norm": 1.573267773611724, + "learning_rate": 9.003347828142126e-06, + "loss": 0.4935, "step": 3218 }, { - "epoch": 0.34, - "grad_norm": 4.352263696228539, - "learning_rate": 7.70274789525878e-06, - "loss": 0.6605, + "epoch": 0.23, + "grad_norm": 1.6265894398535938, + "learning_rate": 9.00265924935092e-06, + "loss": 0.6053, "step": 3219 }, { - "epoch": 0.34, - "grad_norm": 8.035499997279176, - "learning_rate": 7.701313941984791e-06, - "loss": 0.6789, + "epoch": 0.23, + "grad_norm": 2.082022885554193, + "learning_rate": 9.001970459125689e-06, + "loss": 0.593, "step": 3220 }, { - "epoch": 0.34, - "grad_norm": 2.629717191952034, - "learning_rate": 7.699879674879958e-06, - "loss": 0.5778, + "epoch": 0.23, + "grad_norm": 1.7503239234522106, + "learning_rate": 9.001281457502818e-06, + "loss": 0.6292, "step": 3221 }, { - "epoch": 0.34, - "grad_norm": 6.130587806018568, - "learning_rate": 7.698445094110909e-06, - "loss": 0.7025, + "epoch": 0.23, + "grad_norm": 0.808309658184656, + "learning_rate": 9.000592244518701e-06, + "loss": 0.4479, "step": 3222 }, { - "epoch": 0.34, - "grad_norm": 2.8760063671669753, - "learning_rate": 7.697010199844308e-06, - "loss": 0.7081, + "epoch": 0.23, + "grad_norm": 1.6458250955201286, + "learning_rate": 8.999902820209747e-06, + "loss": 0.5052, "step": 3223 }, { - "epoch": 0.34, - "grad_norm": 2.7331704381503226, - "learning_rate": 7.69557499224686e-06, - "loss": 0.7452, + "epoch": 0.23, + "grad_norm": 1.9860024271040086, + "learning_rate": 8.999213184612371e-06, + "loss": 0.5959, "step": 3224 }, { - "epoch": 0.34, - "grad_norm": 2.850116734761613, - "learning_rate": 7.694139471485301e-06, - "loss": 0.6846, + "epoch": 0.23, + "grad_norm": 1.482142967295738, + "learning_rate": 8.998523337763005e-06, + "loss": 0.4915, "step": 3225 }, { - "epoch": 0.34, - "grad_norm": 3.489662975427095, - "learning_rate": 7.692703637726407e-06, - "loss": 0.7061, + "epoch": 0.23, + "grad_norm": 0.8492377750495772, + "learning_rate": 8.997833279698088e-06, + "loss": 0.4359, "step": 3226 }, { - "epoch": 0.34, - "grad_norm": 2.903189368179508, - "learning_rate": 7.691267491136986e-06, - "loss": 0.5947, + "epoch": 0.23, + "grad_norm": 1.9519200731969515, + "learning_rate": 8.997143010454069e-06, + "loss": 0.5845, "step": 3227 }, { - "epoch": 0.34, - "grad_norm": 4.316176181783629, - "learning_rate": 7.689831031883887e-06, - "loss": 0.6344, + "epoch": 0.23, + "grad_norm": 1.7414482384857435, + "learning_rate": 8.996452530067413e-06, + "loss": 0.5914, "step": 3228 }, { - "epoch": 0.34, - "grad_norm": 2.4581575403347564, - "learning_rate": 7.688394260133997e-06, - "loss": 0.6042, + "epoch": 0.23, + "grad_norm": 1.8617919274310695, + "learning_rate": 8.995761838574591e-06, + "loss": 0.5384, "step": 3229 }, { - "epoch": 0.34, - "grad_norm": 3.424492880509628, - "learning_rate": 7.686957176054231e-06, - "loss": 0.6886, + "epoch": 0.23, + "grad_norm": 1.6887187479243637, + "learning_rate": 8.99507093601209e-06, + "loss": 0.5944, "step": 3230 }, { - "epoch": 0.34, - "grad_norm": 2.4711470411348553, - "learning_rate": 7.68551977981155e-06, - "loss": 0.5719, + "epoch": 0.23, + "grad_norm": 1.8697429769591354, + "learning_rate": 8.994379822416405e-06, + "loss": 0.6304, "step": 3231 }, { - "epoch": 0.34, - "grad_norm": 2.4487165637771358, - "learning_rate": 7.684082071572943e-06, - "loss": 0.7407, + "epoch": 0.23, + "grad_norm": 1.8737299216826195, + "learning_rate": 8.993688497824044e-06, + "loss": 0.5256, "step": 3232 }, { - "epoch": 0.34, - "grad_norm": 2.8173730592149235, - "learning_rate": 7.68264405150544e-06, - "loss": 0.6361, + "epoch": 0.23, + "grad_norm": 1.5871851110323085, + "learning_rate": 8.992996962271523e-06, + "loss": 0.5815, "step": 3233 }, { - "epoch": 0.34, - "grad_norm": 3.721164285756093, - "learning_rate": 7.681205719776104e-06, - "loss": 0.7631, + "epoch": 0.23, + "grad_norm": 0.8170394597898282, + "learning_rate": 8.992305215795373e-06, + "loss": 0.4574, "step": 3234 }, { - "epoch": 0.34, - "grad_norm": 3.9251106459657077, - "learning_rate": 7.679767076552038e-06, - "loss": 0.6352, + "epoch": 0.23, + "grad_norm": 1.6425415738908165, + "learning_rate": 8.991613258432132e-06, + "loss": 0.4784, "step": 3235 }, { - "epoch": 0.34, - "grad_norm": 1.0737715770377105, - "learning_rate": 7.678328122000382e-06, - "loss": 0.6233, + "epoch": 0.23, + "grad_norm": 1.5787527376473423, + "learning_rate": 8.990921090218355e-06, + "loss": 0.555, "step": 3236 }, { - "epoch": 0.34, - "grad_norm": 2.5541768150933235, - "learning_rate": 7.676888856288307e-06, - "loss": 0.6348, + "epoch": 0.23, + "grad_norm": 1.9433371348693589, + "learning_rate": 8.990228711190603e-06, + "loss": 0.4688, "step": 3237 }, { - "epoch": 0.34, - "grad_norm": 2.580142219433405, - "learning_rate": 7.67544927958302e-06, - "loss": 0.6439, + "epoch": 0.23, + "grad_norm": 1.723896395017194, + "learning_rate": 8.98953612138545e-06, + "loss": 0.5957, "step": 3238 }, { - "epoch": 0.34, - "grad_norm": 2.654991640432859, - "learning_rate": 7.67400939205177e-06, - "loss": 0.6801, + "epoch": 0.23, + "grad_norm": 1.617093047604041, + "learning_rate": 8.98884332083948e-06, + "loss": 0.5168, "step": 3239 }, { - "epoch": 0.34, - "grad_norm": 3.6174398276239623, - "learning_rate": 7.67256919386184e-06, - "loss": 0.7203, + "epoch": 0.23, + "grad_norm": 1.5806182537705642, + "learning_rate": 8.98815030958929e-06, + "loss": 0.6095, "step": 3240 }, { - "epoch": 0.34, - "grad_norm": 3.5610803553484955, - "learning_rate": 7.671128685180547e-06, - "loss": 0.6511, + "epoch": 0.23, + "grad_norm": 0.8058443448221734, + "learning_rate": 8.987457087671485e-06, + "loss": 0.4419, "step": 3241 }, { - "epoch": 0.34, - "grad_norm": 2.938858056226461, - "learning_rate": 7.669687866175245e-06, - "loss": 0.6039, + "epoch": 0.23, + "grad_norm": 1.7281444572734632, + "learning_rate": 8.986763655122689e-06, + "loss": 0.4612, "step": 3242 }, { - "epoch": 0.34, - "grad_norm": 2.8043292999636313, - "learning_rate": 7.668246737013323e-06, - "loss": 0.6662, + "epoch": 0.23, + "grad_norm": 1.7229801781965843, + "learning_rate": 8.986070011979524e-06, + "loss": 0.5727, "step": 3243 }, { - "epoch": 0.34, - "grad_norm": 2.3949517216338907, - "learning_rate": 7.666805297862208e-06, - "loss": 0.6496, + "epoch": 0.23, + "grad_norm": 2.152121734131126, + "learning_rate": 8.985376158278636e-06, + "loss": 0.5907, "step": 3244 }, { - "epoch": 0.34, - "grad_norm": 3.1462781898491157, - "learning_rate": 7.665363548889362e-06, - "loss": 0.6178, + "epoch": 0.23, + "grad_norm": 1.7848930296593741, + "learning_rate": 8.984682094056676e-06, + "loss": 0.5802, "step": 3245 }, { - "epoch": 0.34, - "grad_norm": 2.5506311534896433, - "learning_rate": 7.663921490262286e-06, - "loss": 0.6798, + "epoch": 0.23, + "grad_norm": 3.2085837570433986, + "learning_rate": 8.983987819350303e-06, + "loss": 0.6336, "step": 3246 }, { - "epoch": 0.34, - "grad_norm": 9.48469185368361, - "learning_rate": 7.66247912214851e-06, - "loss": 0.623, + "epoch": 0.23, + "grad_norm": 1.823813665256233, + "learning_rate": 8.983293334196197e-06, + "loss": 0.5401, "step": 3247 }, { - "epoch": 0.34, - "grad_norm": 2.841001179637787, - "learning_rate": 7.661036444715608e-06, - "loss": 0.6621, + "epoch": 0.23, + "grad_norm": 1.5727330068275822, + "learning_rate": 8.982598638631038e-06, + "loss": 0.5829, "step": 3248 }, { - "epoch": 0.34, - "grad_norm": 1.1276649407769432, - "learning_rate": 7.659593458131181e-06, - "loss": 0.6079, + "epoch": 0.23, + "grad_norm": 1.910220020160539, + "learning_rate": 8.981903732691525e-06, + "loss": 0.6025, "step": 3249 }, { - "epoch": 0.34, - "grad_norm": 2.967235234356677, - "learning_rate": 7.658150162562875e-06, - "loss": 0.6655, + "epoch": 0.23, + "grad_norm": 2.0193488620608098, + "learning_rate": 8.981208616414363e-06, + "loss": 0.5942, "step": 3250 }, { - "epoch": 0.34, - "grad_norm": 2.935135934795403, - "learning_rate": 7.656706558178368e-06, - "loss": 0.6983, + "epoch": 0.23, + "grad_norm": 1.7100276560604157, + "learning_rate": 8.980513289836272e-06, + "loss": 0.5197, "step": 3251 }, { - "epoch": 0.34, - "grad_norm": 2.7507617496596493, - "learning_rate": 7.655262645145374e-06, - "loss": 0.6997, + "epoch": 0.23, + "grad_norm": 1.3657679583905071, + "learning_rate": 8.979817752993982e-06, + "loss": 0.4892, "step": 3252 }, { - "epoch": 0.34, - "grad_norm": 3.6113941756706422, - "learning_rate": 7.65381842363164e-06, - "loss": 0.6512, + "epoch": 0.23, + "grad_norm": 2.4157532542283082, + "learning_rate": 8.979122005924232e-06, + "loss": 0.6404, "step": 3253 }, { - "epoch": 0.34, - "grad_norm": 2.9939074711670517, - "learning_rate": 7.652373893804952e-06, - "loss": 0.6817, + "epoch": 0.23, + "grad_norm": 1.5478093824588186, + "learning_rate": 8.978426048663776e-06, + "loss": 0.6122, "step": 3254 }, { - "epoch": 0.34, - "grad_norm": 3.5665404033863157, - "learning_rate": 7.650929055833135e-06, - "loss": 0.6852, + "epoch": 0.23, + "grad_norm": 1.7843355529890397, + "learning_rate": 8.977729881249375e-06, + "loss": 0.6285, "step": 3255 }, { - "epoch": 0.34, - "grad_norm": 2.7571299007941965, - "learning_rate": 7.64948390988404e-06, - "loss": 0.6688, + "epoch": 0.23, + "grad_norm": 1.8957762452006313, + "learning_rate": 8.977033503717803e-06, + "loss": 0.5759, "step": 3256 }, { - "epoch": 0.34, - "grad_norm": 3.93253267201708, - "learning_rate": 7.648038456125566e-06, - "loss": 0.6498, + "epoch": 0.23, + "grad_norm": 2.3846621747530152, + "learning_rate": 8.976336916105844e-06, + "loss": 0.5404, "step": 3257 }, { - "epoch": 0.34, - "grad_norm": 1.2564745859826627, - "learning_rate": 7.646592694725638e-06, - "loss": 0.6059, + "epoch": 0.23, + "grad_norm": 2.122295679397442, + "learning_rate": 8.975640118450297e-06, + "loss": 0.5601, "step": 3258 }, { - "epoch": 0.34, - "grad_norm": 2.460398219897918, - "learning_rate": 7.64514662585222e-06, - "loss": 0.7199, + "epoch": 0.23, + "grad_norm": 1.8064780726434821, + "learning_rate": 8.974943110787968e-06, + "loss": 0.614, "step": 3259 }, { - "epoch": 0.34, - "grad_norm": 2.095756389781973, - "learning_rate": 7.643700249673315e-06, - "loss": 0.6769, + "epoch": 0.23, + "grad_norm": 0.7978490931661911, + "learning_rate": 8.974245893155673e-06, + "loss": 0.4461, "step": 3260 }, { - "epoch": 0.34, - "grad_norm": 2.6122927168443058, - "learning_rate": 7.642253566356957e-06, - "loss": 0.6627, + "epoch": 0.23, + "grad_norm": 1.653306588106065, + "learning_rate": 8.973548465590244e-06, + "loss": 0.5917, "step": 3261 }, { - "epoch": 0.34, - "grad_norm": 4.825071935901371, - "learning_rate": 7.640806576071215e-06, - "loss": 0.6343, + "epoch": 0.23, + "grad_norm": 1.9903412843688097, + "learning_rate": 8.97285082812852e-06, + "loss": 0.6436, "step": 3262 }, { - "epoch": 0.34, - "grad_norm": 2.968048064848337, - "learning_rate": 7.639359278984202e-06, - "loss": 0.7232, + "epoch": 0.23, + "grad_norm": 2.6452481561831447, + "learning_rate": 8.972152980807357e-06, + "loss": 0.5578, "step": 3263 }, { - "epoch": 0.34, - "grad_norm": 2.636260757493031, - "learning_rate": 7.637911675264056e-06, - "loss": 0.6242, + "epoch": 0.23, + "grad_norm": 1.8024570153417603, + "learning_rate": 8.971454923663611e-06, + "loss": 0.627, "step": 3264 }, { - "epoch": 0.34, - "grad_norm": 6.452731556132531, - "learning_rate": 7.636463765078958e-06, - "loss": 0.6242, + "epoch": 0.23, + "grad_norm": 1.7620173399466177, + "learning_rate": 8.97075665673416e-06, + "loss": 0.5361, "step": 3265 }, { - "epoch": 0.34, - "grad_norm": 1.0833107459100242, - "learning_rate": 7.63501554859712e-06, - "loss": 0.5713, + "epoch": 0.23, + "grad_norm": 2.3496644424368154, + "learning_rate": 8.970058180055887e-06, + "loss": 0.548, "step": 3266 }, { - "epoch": 0.34, - "grad_norm": 3.2859379838193097, - "learning_rate": 7.633567025986795e-06, - "loss": 0.7321, + "epoch": 0.23, + "grad_norm": 2.44453808603195, + "learning_rate": 8.969359493665688e-06, + "loss": 0.5887, "step": 3267 }, { - "epoch": 0.34, - "grad_norm": 2.9027700596440162, - "learning_rate": 7.632118197416263e-06, - "loss": 0.574, + "epoch": 0.23, + "grad_norm": 0.8088939148600929, + "learning_rate": 8.968660597600472e-06, + "loss": 0.4512, "step": 3268 }, { - "epoch": 0.34, - "grad_norm": 2.7949610995170224, - "learning_rate": 7.630669063053849e-06, - "loss": 0.6283, + "epoch": 0.23, + "grad_norm": 1.7675821167537464, + "learning_rate": 8.967961491897155e-06, + "loss": 0.5384, "step": 3269 }, { - "epoch": 0.34, - "grad_norm": 2.724700144475404, - "learning_rate": 7.629219623067907e-06, - "loss": 0.6921, + "epoch": 0.23, + "grad_norm": 1.987730266513659, + "learning_rate": 8.967262176592665e-06, + "loss": 0.6607, "step": 3270 }, { - "epoch": 0.34, - "grad_norm": 3.384530330225657, - "learning_rate": 7.62776987762683e-06, - "loss": 0.6618, + "epoch": 0.23, + "grad_norm": 2.820200398762011, + "learning_rate": 8.966562651723947e-06, + "loss": 0.5531, "step": 3271 }, { - "epoch": 0.34, - "grad_norm": 2.591079385525758, - "learning_rate": 7.626319826899045e-06, - "loss": 0.6482, + "epoch": 0.23, + "grad_norm": 1.730497142660675, + "learning_rate": 8.965862917327947e-06, + "loss": 0.6658, "step": 3272 }, { - "epoch": 0.34, - "grad_norm": 3.314517492095959, - "learning_rate": 7.624869471053014e-06, - "loss": 0.6968, + "epoch": 0.23, + "grad_norm": 1.6933763895642082, + "learning_rate": 8.96516297344163e-06, + "loss": 0.5852, "step": 3273 }, { - "epoch": 0.34, - "grad_norm": 2.71453402590448, - "learning_rate": 7.623418810257234e-06, - "loss": 0.6831, + "epoch": 0.23, + "grad_norm": 1.603094362094811, + "learning_rate": 8.96446282010197e-06, + "loss": 0.5811, "step": 3274 }, { - "epoch": 0.34, - "grad_norm": 3.710436771293437, - "learning_rate": 7.621967844680241e-06, - "loss": 0.6333, + "epoch": 0.23, + "grad_norm": 1.6547792106296562, + "learning_rate": 8.963762457345948e-06, + "loss": 0.5711, "step": 3275 }, { - "epoch": 0.34, - "grad_norm": 3.8259084062940265, - "learning_rate": 7.620516574490604e-06, - "loss": 0.6768, + "epoch": 0.23, + "grad_norm": 1.7972955789274427, + "learning_rate": 8.963061885210563e-06, + "loss": 0.5815, "step": 3276 }, { - "epoch": 0.34, - "grad_norm": 2.291949490786709, - "learning_rate": 7.6190649998569265e-06, - "loss": 0.6547, + "epoch": 0.23, + "grad_norm": 1.843281373499341, + "learning_rate": 8.962361103732822e-06, + "loss": 0.5329, "step": 3277 }, { - "epoch": 0.34, - "grad_norm": 2.7125389974746223, - "learning_rate": 7.617613120947848e-06, - "loss": 0.5768, + "epoch": 0.23, + "grad_norm": 1.8133405167481242, + "learning_rate": 8.96166011294974e-06, + "loss": 0.5404, "step": 3278 }, { - "epoch": 0.35, - "grad_norm": 9.167970886019283, - "learning_rate": 7.616160937932045e-06, - "loss": 0.7309, + "epoch": 0.23, + "grad_norm": 1.8177104335973644, + "learning_rate": 8.960958912898347e-06, + "loss": 0.6811, "step": 3279 }, { - "epoch": 0.35, - "grad_norm": 4.549733398070985, - "learning_rate": 7.614708450978226e-06, - "loss": 0.7135, + "epoch": 0.23, + "grad_norm": 1.6230752306347012, + "learning_rate": 8.960257503615682e-06, + "loss": 0.5814, "step": 3280 }, { - "epoch": 0.35, - "grad_norm": 2.9374280731445555, - "learning_rate": 7.613255660255137e-06, - "loss": 0.7211, + "epoch": 0.23, + "grad_norm": 1.5580439386903668, + "learning_rate": 8.959555885138798e-06, + "loss": 0.5811, "step": 3281 }, { - "epoch": 0.35, - "grad_norm": 2.870278169530705, - "learning_rate": 7.611802565931559e-06, - "loss": 0.6897, + "epoch": 0.23, + "grad_norm": 2.7026019795984744, + "learning_rate": 8.958854057504754e-06, + "loss": 0.5171, "step": 3282 }, { - "epoch": 0.35, - "grad_norm": 1.2543736203150375, - "learning_rate": 7.610349168176309e-06, - "loss": 0.6159, + "epoch": 0.23, + "grad_norm": 1.5972451889926471, + "learning_rate": 8.958152020750624e-06, + "loss": 0.5405, "step": 3283 }, { - "epoch": 0.35, - "grad_norm": 2.738519475571267, - "learning_rate": 7.608895467158241e-06, - "loss": 0.7084, + "epoch": 0.23, + "grad_norm": 1.8961367685518233, + "learning_rate": 8.957449774913493e-06, + "loss": 0.6329, "step": 3284 }, { - "epoch": 0.35, - "grad_norm": 4.479720977262225, - "learning_rate": 7.607441463046236e-06, - "loss": 0.7224, + "epoch": 0.23, + "grad_norm": 2.126515352611859, + "learning_rate": 8.956747320030457e-06, + "loss": 0.5431, "step": 3285 }, { - "epoch": 0.35, - "grad_norm": 3.4942767891910593, - "learning_rate": 7.60598715600922e-06, - "loss": 0.6773, + "epoch": 0.23, + "grad_norm": 2.2571681359566056, + "learning_rate": 8.956044656138617e-06, + "loss": 0.6407, "step": 3286 }, { - "epoch": 0.35, - "grad_norm": 4.350906281284041, - "learning_rate": 7.60453254621615e-06, - "loss": 0.6071, + "epoch": 0.23, + "grad_norm": 1.6908411525009268, + "learning_rate": 8.955341783275096e-06, + "loss": 0.5712, "step": 3287 }, { - "epoch": 0.35, - "grad_norm": 3.8683834683641827, - "learning_rate": 7.603077633836018e-06, - "loss": 0.6792, + "epoch": 0.23, + "grad_norm": 1.6308039919470703, + "learning_rate": 8.954638701477018e-06, + "loss": 0.5519, "step": 3288 }, { - "epoch": 0.35, - "grad_norm": 2.7330042305762703, - "learning_rate": 7.601622419037851e-06, - "loss": 0.5959, + "epoch": 0.23, + "grad_norm": 1.7898155686375137, + "learning_rate": 8.953935410781523e-06, + "loss": 0.5571, "step": 3289 }, { - "epoch": 0.35, - "grad_norm": 3.4753065005990313, - "learning_rate": 7.600166901990711e-06, - "loss": 0.6422, + "epoch": 0.23, + "grad_norm": 1.7908501788722158, + "learning_rate": 8.953231911225763e-06, + "loss": 0.5231, "step": 3290 }, { - "epoch": 0.35, - "grad_norm": 2.6481317364356585, - "learning_rate": 7.5987110828636966e-06, - "loss": 0.7076, + "epoch": 0.23, + "grad_norm": 1.6947720245475335, + "learning_rate": 8.952528202846897e-06, + "loss": 0.5707, "step": 3291 }, { - "epoch": 0.35, - "grad_norm": 4.978965610289502, - "learning_rate": 7.5972549618259415e-06, - "loss": 0.6281, + "epoch": 0.23, + "grad_norm": 2.950233266605976, + "learning_rate": 8.9518242856821e-06, + "loss": 0.6193, "step": 3292 }, { - "epoch": 0.35, - "grad_norm": 3.1836343422551043, - "learning_rate": 7.595798539046612e-06, - "loss": 0.6259, + "epoch": 0.23, + "grad_norm": 0.8330861751099594, + "learning_rate": 8.951120159768553e-06, + "loss": 0.4408, "step": 3293 }, { - "epoch": 0.35, - "grad_norm": 2.6636282179606776, - "learning_rate": 7.594341814694914e-06, - "loss": 0.5633, + "epoch": 0.23, + "grad_norm": 1.6140889342677536, + "learning_rate": 8.95041582514345e-06, + "loss": 0.578, "step": 3294 }, { - "epoch": 0.35, - "grad_norm": 2.6492054525812354, - "learning_rate": 7.592884788940082e-06, - "loss": 0.7039, + "epoch": 0.23, + "grad_norm": 1.7151110918857462, + "learning_rate": 8.949711281843998e-06, + "loss": 0.5773, "step": 3295 }, { - "epoch": 0.35, - "grad_norm": 2.6199101782391185, - "learning_rate": 7.59142746195139e-06, - "loss": 0.6398, + "epoch": 0.23, + "grad_norm": 1.5358286755630441, + "learning_rate": 8.949006529907413e-06, + "loss": 0.5822, "step": 3296 }, { - "epoch": 0.35, - "grad_norm": 2.4865546985556777, - "learning_rate": 7.5899698338981475e-06, - "loss": 0.625, + "epoch": 0.23, + "grad_norm": 1.6785762716337438, + "learning_rate": 8.94830156937092e-06, + "loss": 0.5059, "step": 3297 }, { - "epoch": 0.35, - "grad_norm": 2.3823056427702305, - "learning_rate": 7.588511904949696e-06, - "loss": 0.7082, + "epoch": 0.23, + "grad_norm": 1.7224452668726724, + "learning_rate": 8.947596400271763e-06, + "loss": 0.5991, "step": 3298 }, { - "epoch": 0.35, - "grad_norm": 2.653522038656345, - "learning_rate": 7.587053675275413e-06, - "loss": 0.7159, + "epoch": 0.23, + "grad_norm": 2.028695672369617, + "learning_rate": 8.946891022647185e-06, + "loss": 0.5832, "step": 3299 }, { - "epoch": 0.35, - "grad_norm": 3.007493250962484, - "learning_rate": 7.585595145044714e-06, - "loss": 0.6847, + "epoch": 0.23, + "grad_norm": 1.7498686806762282, + "learning_rate": 8.946185436534452e-06, + "loss": 0.6337, "step": 3300 }, { - "epoch": 0.35, - "grad_norm": 3.9762142271332412, - "learning_rate": 7.5841363144270445e-06, - "loss": 0.6299, + "epoch": 0.23, + "grad_norm": 2.5404427633307125, + "learning_rate": 8.94547964197083e-06, + "loss": 0.5747, "step": 3301 }, { - "epoch": 0.35, - "grad_norm": 4.081453490480924, - "learning_rate": 7.582677183591889e-06, - "loss": 0.6112, + "epoch": 0.23, + "grad_norm": 1.8592883144946082, + "learning_rate": 8.944773638993604e-06, + "loss": 0.5421, "step": 3302 }, { - "epoch": 0.35, - "grad_norm": 3.0011529464421827, - "learning_rate": 7.581217752708763e-06, - "loss": 0.6528, + "epoch": 0.23, + "grad_norm": 0.768196745042951, + "learning_rate": 8.94406742764007e-06, + "loss": 0.4802, "step": 3303 }, { - "epoch": 0.35, - "grad_norm": 3.8502588551289163, - "learning_rate": 7.579758021947221e-06, - "loss": 0.7399, + "epoch": 0.23, + "grad_norm": 1.8652791121099208, + "learning_rate": 8.943361007947529e-06, + "loss": 0.6477, "step": 3304 }, { - "epoch": 0.35, - "grad_norm": 3.6979556515844454, - "learning_rate": 7.578297991476848e-06, - "loss": 0.7052, + "epoch": 0.23, + "grad_norm": 1.697992567049026, + "learning_rate": 8.942654379953297e-06, + "loss": 0.5566, "step": 3305 }, { - "epoch": 0.35, - "grad_norm": 2.409793154362905, - "learning_rate": 7.576837661467269e-06, - "loss": 0.5654, + "epoch": 0.23, + "grad_norm": 4.174228271284152, + "learning_rate": 8.941947543694703e-06, + "loss": 0.5926, "step": 3306 }, { - "epoch": 0.35, - "grad_norm": 3.0297576136517264, - "learning_rate": 7.575377032088138e-06, - "loss": 0.5874, + "epoch": 0.23, + "grad_norm": 1.8688475633301662, + "learning_rate": 8.94124049920908e-06, + "loss": 0.6027, "step": 3307 }, { - "epoch": 0.35, - "grad_norm": 2.335566510834448, - "learning_rate": 7.573916103509149e-06, - "loss": 0.6567, + "epoch": 0.23, + "grad_norm": 0.808972929261961, + "learning_rate": 8.940533246533781e-06, + "loss": 0.4498, "step": 3308 }, { - "epoch": 0.35, - "grad_norm": 2.8306846989581675, - "learning_rate": 7.572454875900026e-06, - "loss": 0.6771, + "epoch": 0.23, + "grad_norm": 1.8525659670635315, + "learning_rate": 8.939825785706163e-06, + "loss": 0.5317, "step": 3309 }, { - "epoch": 0.35, - "grad_norm": 2.914166159192387, - "learning_rate": 7.570993349430533e-06, - "loss": 0.6979, + "epoch": 0.23, + "grad_norm": 1.5646139743883554, + "learning_rate": 8.939118116763597e-06, + "loss": 0.6509, "step": 3310 }, { - "epoch": 0.35, - "grad_norm": 3.172792917556958, - "learning_rate": 7.569531524270465e-06, - "loss": 0.6625, + "epoch": 0.23, + "grad_norm": 2.1153692934694477, + "learning_rate": 8.938410239743465e-06, + "loss": 0.5817, "step": 3311 }, { - "epoch": 0.35, - "grad_norm": 5.322440067377819, - "learning_rate": 7.568069400589651e-06, - "loss": 0.6143, + "epoch": 0.24, + "grad_norm": 1.5861706958437227, + "learning_rate": 8.937702154683159e-06, + "loss": 0.5315, "step": 3312 }, { - "epoch": 0.35, - "grad_norm": 2.7366214062810466, - "learning_rate": 7.566606978557959e-06, - "loss": 0.6719, + "epoch": 0.24, + "grad_norm": 1.6783010290126583, + "learning_rate": 8.936993861620081e-06, + "loss": 0.5762, "step": 3313 }, { - "epoch": 0.35, - "grad_norm": 3.6661060876658205, - "learning_rate": 7.565144258345287e-06, - "loss": 0.7597, + "epoch": 0.24, + "grad_norm": 1.4207336485531172, + "learning_rate": 8.936285360591648e-06, + "loss": 0.5195, "step": 3314 }, { - "epoch": 0.35, - "grad_norm": 2.841666028729839, - "learning_rate": 7.563681240121569e-06, - "loss": 0.6452, + "epoch": 0.24, + "grad_norm": 1.4901147640304615, + "learning_rate": 8.935576651635285e-06, + "loss": 0.5021, "step": 3315 }, { - "epoch": 0.35, - "grad_norm": 2.9148308706374637, - "learning_rate": 7.562217924056777e-06, - "loss": 0.7037, + "epoch": 0.24, + "grad_norm": 1.8584633026477153, + "learning_rate": 8.934867734788427e-06, + "loss": 0.5761, "step": 3316 }, { - "epoch": 0.35, - "grad_norm": 2.493732792998696, - "learning_rate": 7.560754310320912e-06, - "loss": 0.665, + "epoch": 0.24, + "grad_norm": 3.4855327972593932, + "learning_rate": 8.934158610088521e-06, + "loss": 0.5331, "step": 3317 }, { - "epoch": 0.35, - "grad_norm": 2.3318417413774633, - "learning_rate": 7.559290399084016e-06, - "loss": 0.6403, + "epoch": 0.24, + "grad_norm": 1.6355755804752488, + "learning_rate": 8.933449277573028e-06, + "loss": 0.619, "step": 3318 }, { - "epoch": 0.35, - "grad_norm": 2.400731362221395, - "learning_rate": 7.5578261905161575e-06, - "loss": 0.6265, + "epoch": 0.24, + "grad_norm": 2.031665369824033, + "learning_rate": 8.932739737279414e-06, + "loss": 0.5384, "step": 3319 }, { - "epoch": 0.35, - "grad_norm": 4.040790993560575, - "learning_rate": 7.556361684787446e-06, - "loss": 0.6665, + "epoch": 0.24, + "grad_norm": 2.0872524464633355, + "learning_rate": 8.932029989245164e-06, + "loss": 0.5349, "step": 3320 }, { - "epoch": 0.35, - "grad_norm": 2.2499211076771033, - "learning_rate": 7.554896882068025e-06, - "loss": 0.594, + "epoch": 0.24, + "grad_norm": 1.658323116315646, + "learning_rate": 8.931320033507765e-06, + "loss": 0.4947, "step": 3321 }, { - "epoch": 0.35, - "grad_norm": 2.357387837819163, - "learning_rate": 7.5534317825280664e-06, - "loss": 0.6814, + "epoch": 0.24, + "grad_norm": 0.8787613605800726, + "learning_rate": 8.93060987010472e-06, + "loss": 0.4497, "step": 3322 }, { - "epoch": 0.35, - "grad_norm": 6.712351178001371, - "learning_rate": 7.551966386337788e-06, - "loss": 0.7282, + "epoch": 0.24, + "grad_norm": 1.7105677114175144, + "learning_rate": 8.929899499073542e-06, + "loss": 0.5302, "step": 3323 }, { - "epoch": 0.35, - "grad_norm": 2.4161878209245478, - "learning_rate": 7.5505006936674304e-06, - "loss": 0.6134, + "epoch": 0.24, + "grad_norm": 1.6672822051694582, + "learning_rate": 8.929188920451759e-06, + "loss": 0.4877, "step": 3324 }, { - "epoch": 0.35, - "grad_norm": 2.640259293020909, - "learning_rate": 7.5490347046872755e-06, - "loss": 0.7243, + "epoch": 0.24, + "grad_norm": 1.7299424805297468, + "learning_rate": 8.928478134276902e-06, + "loss": 0.5733, "step": 3325 }, { - "epoch": 0.35, - "grad_norm": 3.240206873144137, - "learning_rate": 7.547568419567637e-06, - "loss": 0.6741, + "epoch": 0.24, + "grad_norm": 1.756923900437499, + "learning_rate": 8.927767140586518e-06, + "loss": 0.5907, "step": 3326 }, { - "epoch": 0.35, - "grad_norm": 3.394746942635679, - "learning_rate": 7.546101838478864e-06, - "loss": 0.6578, + "epoch": 0.24, + "grad_norm": 1.6963719683103526, + "learning_rate": 8.927055939418165e-06, + "loss": 0.5676, "step": 3327 }, { - "epoch": 0.35, - "grad_norm": 3.144423846324425, - "learning_rate": 7.54463496159134e-06, - "loss": 0.6511, + "epoch": 0.24, + "grad_norm": 2.4182498960978727, + "learning_rate": 8.92634453080941e-06, + "loss": 0.5551, "step": 3328 }, { - "epoch": 0.35, - "grad_norm": 2.6453748407725106, - "learning_rate": 7.543167789075481e-06, - "loss": 0.6275, + "epoch": 0.24, + "grad_norm": 1.6592001965495446, + "learning_rate": 8.925632914797833e-06, + "loss": 0.5682, "step": 3329 }, { - "epoch": 0.35, - "grad_norm": 2.3080573199067187, - "learning_rate": 7.54170032110174e-06, - "loss": 0.6676, + "epoch": 0.24, + "grad_norm": 1.753568725380367, + "learning_rate": 8.924921091421024e-06, + "loss": 0.5487, "step": 3330 }, { - "epoch": 0.35, - "grad_norm": 2.714660296548131, - "learning_rate": 7.540232557840604e-06, - "loss": 0.611, + "epoch": 0.24, + "grad_norm": 1.956388164722983, + "learning_rate": 8.924209060716583e-06, + "loss": 0.6135, "step": 3331 }, { - "epoch": 0.35, - "grad_norm": 4.047279583079742, - "learning_rate": 7.53876449946259e-06, - "loss": 0.6867, + "epoch": 0.24, + "grad_norm": 2.099965269566592, + "learning_rate": 8.923496822722122e-06, + "loss": 0.588, "step": 3332 }, { - "epoch": 0.35, - "grad_norm": 3.8795995067294924, - "learning_rate": 7.537296146138255e-06, - "loss": 0.5692, + "epoch": 0.24, + "grad_norm": 1.4940765816120898, + "learning_rate": 8.922784377475266e-06, + "loss": 0.5625, "step": 3333 }, { - "epoch": 0.35, - "grad_norm": 2.8734422567926003, - "learning_rate": 7.535827498038192e-06, - "loss": 0.7021, + "epoch": 0.24, + "grad_norm": 1.5454030610067244, + "learning_rate": 8.922071725013646e-06, + "loss": 0.5806, "step": 3334 }, { - "epoch": 0.35, - "grad_norm": 5.854617594622321, - "learning_rate": 7.534358555333018e-06, - "loss": 0.6198, + "epoch": 0.24, + "grad_norm": 1.653357446448182, + "learning_rate": 8.921358865374908e-06, + "loss": 0.5975, "step": 3335 }, { - "epoch": 0.35, - "grad_norm": 5.60018430557542, - "learning_rate": 7.532889318193393e-06, - "loss": 0.6089, + "epoch": 0.24, + "grad_norm": 3.3097044656074774, + "learning_rate": 8.920645798596705e-06, + "loss": 0.5498, "step": 3336 }, { - "epoch": 0.35, - "grad_norm": 6.2343822540898515, - "learning_rate": 7.531419786790011e-06, - "loss": 0.7023, + "epoch": 0.24, + "grad_norm": 2.081765124086054, + "learning_rate": 8.919932524716707e-06, + "loss": 0.5942, "step": 3337 }, { - "epoch": 0.35, - "grad_norm": 3.095566888586303, - "learning_rate": 7.5299499612935934e-06, - "loss": 0.6577, + "epoch": 0.24, + "grad_norm": 1.5045136450084275, + "learning_rate": 8.919219043772592e-06, + "loss": 0.5252, "step": 3338 }, { - "epoch": 0.35, - "grad_norm": 3.136010955006197, - "learning_rate": 7.528479841874904e-06, - "loss": 0.6715, + "epoch": 0.24, + "grad_norm": 1.7119010101119012, + "learning_rate": 8.918505355802046e-06, + "loss": 0.5382, "step": 3339 }, { - "epoch": 0.35, - "grad_norm": 2.077841229423998, - "learning_rate": 7.527009428704735e-06, - "loss": 0.683, + "epoch": 0.24, + "grad_norm": 1.6773596881699404, + "learning_rate": 8.917791460842771e-06, + "loss": 0.5272, "step": 3340 }, { - "epoch": 0.35, - "grad_norm": 2.840010075056747, - "learning_rate": 7.525538721953915e-06, - "loss": 0.7238, + "epoch": 0.24, + "grad_norm": 1.6090438517733556, + "learning_rate": 8.917077358932473e-06, + "loss": 0.5696, "step": 3341 }, { - "epoch": 0.35, - "grad_norm": 1.1576704441730468, - "learning_rate": 7.524067721793309e-06, - "loss": 0.6221, + "epoch": 0.24, + "grad_norm": 1.5445909479200173, + "learning_rate": 8.916363050108879e-06, + "loss": 0.57, "step": 3342 }, { - "epoch": 0.35, - "grad_norm": 2.6260852715338414, - "learning_rate": 7.522596428393809e-06, - "loss": 0.6732, + "epoch": 0.24, + "grad_norm": 1.5366314102451488, + "learning_rate": 8.915648534409715e-06, + "loss": 0.63, "step": 3343 }, { - "epoch": 0.35, - "grad_norm": 3.35119357031428, - "learning_rate": 7.521124841926348e-06, - "loss": 0.6466, + "epoch": 0.24, + "grad_norm": 1.5809792752028764, + "learning_rate": 8.91493381187273e-06, + "loss": 0.5486, "step": 3344 }, { - "epoch": 0.35, - "grad_norm": 2.410888681828178, - "learning_rate": 7.519652962561894e-06, - "loss": 0.7642, + "epoch": 0.24, + "grad_norm": 1.8303657366632156, + "learning_rate": 8.914218882535675e-06, + "loss": 0.5413, "step": 3345 }, { - "epoch": 0.35, - "grad_norm": 3.7276855469070074, - "learning_rate": 7.5181807904714385e-06, - "loss": 0.6249, + "epoch": 0.24, + "grad_norm": 2.12940205536627, + "learning_rate": 8.913503746436314e-06, + "loss": 0.5493, "step": 3346 }, { - "epoch": 0.35, - "grad_norm": 3.415586524281074, - "learning_rate": 7.516708325826021e-06, - "loss": 0.5827, + "epoch": 0.24, + "grad_norm": 1.651953553757289, + "learning_rate": 8.912788403612425e-06, + "loss": 0.5587, "step": 3347 }, { - "epoch": 0.35, - "grad_norm": 2.8263139482195503, - "learning_rate": 7.515235568796704e-06, - "loss": 0.662, + "epoch": 0.24, + "grad_norm": 1.836977329857448, + "learning_rate": 8.912072854101794e-06, + "loss": 0.5836, "step": 3348 }, { - "epoch": 0.35, - "grad_norm": 2.664326847472684, - "learning_rate": 7.513762519554588e-06, - "loss": 0.5015, + "epoch": 0.24, + "grad_norm": 1.8057506398122625, + "learning_rate": 8.91135709794222e-06, + "loss": 0.5843, "step": 3349 }, { - "epoch": 0.35, - "grad_norm": 2.618966692292694, - "learning_rate": 7.51228917827081e-06, - "loss": 0.6738, + "epoch": 0.24, + "grad_norm": 1.9644616700122761, + "learning_rate": 8.91064113517151e-06, + "loss": 0.5307, "step": 3350 }, { - "epoch": 0.35, - "grad_norm": 3.633158245759139, - "learning_rate": 7.510815545116539e-06, - "loss": 0.6204, + "epoch": 0.24, + "grad_norm": 1.5492554918806145, + "learning_rate": 8.909924965827485e-06, + "loss": 0.5292, "step": 3351 }, { - "epoch": 0.35, - "grad_norm": 2.4185537158238395, - "learning_rate": 7.509341620262976e-06, - "loss": 0.5918, + "epoch": 0.24, + "grad_norm": 1.7878326015603088, + "learning_rate": 8.909208589947973e-06, + "loss": 0.5877, "step": 3352 }, { - "epoch": 0.35, - "grad_norm": 2.4156711101816497, - "learning_rate": 7.507867403881356e-06, - "loss": 0.7105, + "epoch": 0.24, + "grad_norm": 1.474988075861754, + "learning_rate": 8.908492007570819e-06, + "loss": 0.5416, "step": 3353 }, { - "epoch": 0.35, - "grad_norm": 8.148153998559598, - "learning_rate": 7.506392896142951e-06, - "loss": 0.6219, + "epoch": 0.24, + "grad_norm": 1.789721349190109, + "learning_rate": 8.907775218733871e-06, + "loss": 0.6279, "step": 3354 }, { - "epoch": 0.35, - "grad_norm": 3.6450108914306396, - "learning_rate": 7.5049180972190646e-06, - "loss": 0.6179, + "epoch": 0.24, + "grad_norm": 1.467179814555534, + "learning_rate": 8.907058223474996e-06, + "loss": 0.6018, "step": 3355 }, { - "epoch": 0.35, - "grad_norm": 2.794686070537919, - "learning_rate": 7.503443007281035e-06, - "loss": 0.662, + "epoch": 0.24, + "grad_norm": 1.670231345152266, + "learning_rate": 8.906341021832066e-06, + "loss": 0.5894, "step": 3356 }, { - "epoch": 0.35, - "grad_norm": 3.1106881667918755, - "learning_rate": 7.501967626500231e-06, - "loss": 0.695, + "epoch": 0.24, + "grad_norm": 1.884425969363166, + "learning_rate": 8.905623613842969e-06, + "loss": 0.6718, "step": 3357 }, { - "epoch": 0.35, - "grad_norm": 3.700458795600681, - "learning_rate": 7.500491955048063e-06, - "loss": 0.6794, + "epoch": 0.24, + "grad_norm": 1.7952880177154242, + "learning_rate": 8.904905999545597e-06, + "loss": 0.5906, "step": 3358 }, { - "epoch": 0.35, - "grad_norm": 2.419751161900197, - "learning_rate": 7.499015993095968e-06, - "loss": 0.6671, + "epoch": 0.24, + "grad_norm": 1.8631934855750314, + "learning_rate": 8.904188178977858e-06, + "loss": 0.5935, "step": 3359 }, { - "epoch": 0.35, - "grad_norm": 2.8449869838184343, - "learning_rate": 7.497539740815419e-06, - "loss": 0.7413, + "epoch": 0.24, + "grad_norm": 1.567379934753429, + "learning_rate": 8.90347015217767e-06, + "loss": 0.6269, "step": 3360 }, { - "epoch": 0.35, - "grad_norm": 3.4184394602863226, - "learning_rate": 7.4960631983779205e-06, - "loss": 0.6781, + "epoch": 0.24, + "grad_norm": 1.6529429353106653, + "learning_rate": 8.902751919182963e-06, + "loss": 0.5607, "step": 3361 }, { - "epoch": 0.35, - "grad_norm": 2.796963292810003, - "learning_rate": 7.494586365955017e-06, - "loss": 0.667, + "epoch": 0.24, + "grad_norm": 1.6019177893335002, + "learning_rate": 8.902033480031675e-06, + "loss": 0.5891, "step": 3362 }, { - "epoch": 0.35, - "grad_norm": 3.0483662745104665, - "learning_rate": 7.49310924371828e-06, - "loss": 0.715, + "epoch": 0.24, + "grad_norm": 0.9561059703314159, + "learning_rate": 8.901314834761756e-06, + "loss": 0.459, "step": 3363 }, { - "epoch": 0.35, - "grad_norm": 3.5907131005662767, - "learning_rate": 7.491631831839318e-06, - "loss": 0.7097, + "epoch": 0.24, + "grad_norm": 1.5701810338608446, + "learning_rate": 8.90059598341117e-06, + "loss": 0.6448, "step": 3364 }, { - "epoch": 0.35, - "grad_norm": 3.175466104453125, - "learning_rate": 7.490154130489773e-06, - "loss": 0.6217, + "epoch": 0.24, + "grad_norm": 1.7579357258391803, + "learning_rate": 8.899876926017884e-06, + "loss": 0.5768, "step": 3365 }, { - "epoch": 0.35, - "grad_norm": 3.423899220213644, - "learning_rate": 7.488676139841318e-06, - "loss": 0.6247, + "epoch": 0.24, + "grad_norm": 1.8822086995804015, + "learning_rate": 8.899157662619887e-06, + "loss": 0.4743, "step": 3366 }, { - "epoch": 0.35, - "grad_norm": 2.8572229700965646, - "learning_rate": 7.487197860065664e-06, - "loss": 0.613, + "epoch": 0.24, + "grad_norm": 1.640191817878203, + "learning_rate": 8.898438193255168e-06, + "loss": 0.5748, "step": 3367 }, { - "epoch": 0.35, - "grad_norm": 2.8587163319916042, - "learning_rate": 7.485719291334551e-06, - "loss": 0.6576, + "epoch": 0.24, + "grad_norm": 2.3754973119027984, + "learning_rate": 8.897718517961734e-06, + "loss": 0.5376, "step": 3368 }, { - "epoch": 0.35, - "grad_norm": 2.3991473483168626, - "learning_rate": 7.484240433819758e-06, - "loss": 0.66, + "epoch": 0.24, + "grad_norm": 2.795766066676986, + "learning_rate": 8.896998636777602e-06, + "loss": 0.6181, "step": 3369 }, { - "epoch": 0.35, - "grad_norm": 5.965531907514714, - "learning_rate": 7.482761287693092e-06, - "loss": 0.6466, + "epoch": 0.24, + "grad_norm": 1.756328554450575, + "learning_rate": 8.896278549740796e-06, + "loss": 0.5956, "step": 3370 }, { - "epoch": 0.35, - "grad_norm": 3.048642664328739, - "learning_rate": 7.481281853126397e-06, - "loss": 0.715, + "epoch": 0.24, + "grad_norm": 1.875577226567001, + "learning_rate": 8.895558256889355e-06, + "loss": 0.5247, "step": 3371 }, { - "epoch": 0.35, - "grad_norm": 2.6309407059230296, - "learning_rate": 7.479802130291548e-06, - "loss": 0.6181, + "epoch": 0.24, + "grad_norm": 1.5920212481848186, + "learning_rate": 8.894837758261327e-06, + "loss": 0.5351, "step": 3372 }, { - "epoch": 0.35, - "grad_norm": 3.222323555003956, - "learning_rate": 7.478322119360457e-06, - "loss": 0.7331, + "epoch": 0.24, + "grad_norm": 1.7383617817183121, + "learning_rate": 8.89411705389477e-06, + "loss": 0.5822, "step": 3373 }, { - "epoch": 0.36, - "grad_norm": 2.2573259131196384, - "learning_rate": 7.476841820505065e-06, - "loss": 0.6684, + "epoch": 0.24, + "grad_norm": 0.9004882643421701, + "learning_rate": 8.893396143827757e-06, + "loss": 0.465, "step": 3374 }, { - "epoch": 0.36, - "grad_norm": 3.072317633131639, - "learning_rate": 7.475361233897352e-06, - "loss": 0.6969, + "epoch": 0.24, + "grad_norm": 1.973180669785003, + "learning_rate": 8.892675028098367e-06, + "loss": 0.6266, "step": 3375 }, { - "epoch": 0.36, - "grad_norm": 3.046601351124791, - "learning_rate": 7.473880359709324e-06, - "loss": 0.6585, + "epoch": 0.24, + "grad_norm": 1.9163886443697267, + "learning_rate": 8.891953706744691e-06, + "loss": 0.5053, "step": 3376 }, { - "epoch": 0.36, - "grad_norm": 2.4729492389743917, - "learning_rate": 7.472399198113029e-06, - "loss": 0.677, + "epoch": 0.24, + "grad_norm": 1.8373515300485945, + "learning_rate": 8.891232179804833e-06, + "loss": 0.528, "step": 3377 }, { - "epoch": 0.36, - "grad_norm": 3.2903365979880492, - "learning_rate": 7.4709177492805405e-06, - "loss": 0.6252, + "epoch": 0.24, + "grad_norm": 0.7884193753225434, + "learning_rate": 8.890510447316907e-06, + "loss": 0.4567, "step": 3378 }, { - "epoch": 0.36, - "grad_norm": 2.9983558599680125, - "learning_rate": 7.46943601338397e-06, - "loss": 0.7068, + "epoch": 0.24, + "grad_norm": 2.0303612042905983, + "learning_rate": 8.889788509319034e-06, + "loss": 0.645, "step": 3379 }, { - "epoch": 0.36, - "grad_norm": 3.0004226327693235, - "learning_rate": 7.4679539905954655e-06, - "loss": 0.6208, + "epoch": 0.24, + "grad_norm": 1.7819371578531362, + "learning_rate": 8.889066365849352e-06, + "loss": 0.5679, "step": 3380 }, { - "epoch": 0.36, - "grad_norm": 2.898934649269225, - "learning_rate": 7.4664716810871975e-06, - "loss": 0.6644, + "epoch": 0.24, + "grad_norm": 0.812632882646329, + "learning_rate": 8.888344016946008e-06, + "loss": 0.4913, "step": 3381 }, { - "epoch": 0.36, - "grad_norm": 3.194372324636005, - "learning_rate": 7.464989085031381e-06, - "loss": 0.6673, + "epoch": 0.24, + "grad_norm": 1.6883289673978974, + "learning_rate": 8.887621462647156e-06, + "loss": 0.5341, "step": 3382 }, { - "epoch": 0.36, - "grad_norm": 1.1182459004852183, - "learning_rate": 7.463506202600257e-06, - "loss": 0.5741, + "epoch": 0.24, + "grad_norm": 2.1197389049767414, + "learning_rate": 8.886898702990965e-06, + "loss": 0.5816, "step": 3383 }, { - "epoch": 0.36, - "grad_norm": 4.36761957421957, - "learning_rate": 7.462023033966104e-06, - "loss": 0.6101, + "epoch": 0.24, + "grad_norm": 1.7154665383166137, + "learning_rate": 8.886175738015617e-06, + "loss": 0.5666, "step": 3384 }, { - "epoch": 0.36, - "grad_norm": 2.2861432544796156, - "learning_rate": 7.4605395793012325e-06, - "loss": 0.6921, + "epoch": 0.24, + "grad_norm": 2.315699147210011, + "learning_rate": 8.885452567759293e-06, + "loss": 0.4998, "step": 3385 }, { - "epoch": 0.36, - "grad_norm": 2.7603235128046846, - "learning_rate": 7.459055838777984e-06, - "loss": 0.6734, + "epoch": 0.24, + "grad_norm": 1.6252700104780606, + "learning_rate": 8.884729192260205e-06, + "loss": 0.5501, "step": 3386 }, { - "epoch": 0.36, - "grad_norm": 3.9793695446983532, - "learning_rate": 7.457571812568738e-06, - "loss": 0.6595, + "epoch": 0.24, + "grad_norm": 1.6910362817364695, + "learning_rate": 8.884005611556553e-06, + "loss": 0.6264, "step": 3387 }, { - "epoch": 0.36, - "grad_norm": 4.311706185746525, - "learning_rate": 7.4560875008459035e-06, - "loss": 0.7186, + "epoch": 0.24, + "grad_norm": 2.4809789604562, + "learning_rate": 8.883281825686565e-06, + "loss": 0.5238, "step": 3388 }, { - "epoch": 0.36, - "grad_norm": 2.597103883565191, - "learning_rate": 7.454602903781921e-06, - "loss": 0.7305, + "epoch": 0.24, + "grad_norm": 1.6604285330805015, + "learning_rate": 8.882557834688473e-06, + "loss": 0.5357, "step": 3389 }, { - "epoch": 0.36, - "grad_norm": 2.501333529220921, - "learning_rate": 7.45311802154927e-06, - "loss": 0.6342, + "epoch": 0.24, + "grad_norm": 2.509300837483833, + "learning_rate": 8.881833638600521e-06, + "loss": 0.6907, "step": 3390 }, { - "epoch": 0.36, - "grad_norm": 2.5800190663717655, - "learning_rate": 7.451632854320459e-06, - "loss": 0.568, + "epoch": 0.24, + "grad_norm": 6.521583384248977, + "learning_rate": 8.881109237460962e-06, + "loss": 0.6326, "step": 3391 }, { - "epoch": 0.36, - "grad_norm": 2.3701799197834, - "learning_rate": 7.4501474022680265e-06, - "loss": 0.6836, + "epoch": 0.24, + "grad_norm": 1.4958846142070614, + "learning_rate": 8.880384631308062e-06, + "loss": 0.5146, "step": 3392 }, { - "epoch": 0.36, - "grad_norm": 2.3772231724274557, - "learning_rate": 7.4486616655645565e-06, - "loss": 0.619, + "epoch": 0.24, + "grad_norm": 2.0713138150248116, + "learning_rate": 8.879659820180097e-06, + "loss": 0.5547, "step": 3393 }, { - "epoch": 0.36, - "grad_norm": 2.8160484269096595, - "learning_rate": 7.447175644382648e-06, - "loss": 0.7025, + "epoch": 0.24, + "grad_norm": 1.4740386639486667, + "learning_rate": 8.878934804115355e-06, + "loss": 0.5566, "step": 3394 }, { - "epoch": 0.36, - "grad_norm": 3.7832742183500967, - "learning_rate": 7.445689338894949e-06, - "loss": 0.7469, + "epoch": 0.24, + "grad_norm": 0.8685163236541731, + "learning_rate": 8.878209583152134e-06, + "loss": 0.466, "step": 3395 }, { - "epoch": 0.36, - "grad_norm": 5.0059621903562, - "learning_rate": 7.444202749274133e-06, - "loss": 0.7397, + "epoch": 0.24, + "grad_norm": 1.7252380446520137, + "learning_rate": 8.87748415732874e-06, + "loss": 0.6246, "step": 3396 }, { - "epoch": 0.36, - "grad_norm": 3.892558568857166, - "learning_rate": 7.442715875692908e-06, - "loss": 0.6436, + "epoch": 0.24, + "grad_norm": 0.8148128572497462, + "learning_rate": 8.876758526683494e-06, + "loss": 0.4801, "step": 3397 }, { - "epoch": 0.36, - "grad_norm": 2.8280194358791824, - "learning_rate": 7.4412287183240115e-06, - "loss": 0.6798, + "epoch": 0.24, + "grad_norm": 2.0870913257741712, + "learning_rate": 8.876032691254725e-06, + "loss": 0.6095, "step": 3398 }, { - "epoch": 0.36, - "grad_norm": 2.837303017856792, - "learning_rate": 7.43974127734022e-06, - "loss": 0.6466, + "epoch": 0.24, + "grad_norm": 8.13031208517316, + "learning_rate": 8.875306651080778e-06, + "loss": 0.5549, "step": 3399 }, { - "epoch": 0.36, - "grad_norm": 2.734694065110121, - "learning_rate": 7.4382535529143395e-06, - "loss": 0.6031, + "epoch": 0.24, + "grad_norm": 3.569192650303241, + "learning_rate": 8.874580406200001e-06, + "loss": 0.5437, "step": 3400 }, { - "epoch": 0.36, - "grad_norm": 3.959288671850776, - "learning_rate": 7.43676554521921e-06, - "loss": 0.6906, + "epoch": 0.24, + "grad_norm": 1.486524647574541, + "learning_rate": 8.87385395665076e-06, + "loss": 0.5438, "step": 3401 }, { - "epoch": 0.36, - "grad_norm": 6.198201079133412, - "learning_rate": 7.435277254427704e-06, - "loss": 0.6888, + "epoch": 0.24, + "grad_norm": 1.6504746906589784, + "learning_rate": 8.873127302471425e-06, + "loss": 0.5285, "step": 3402 }, { - "epoch": 0.36, - "grad_norm": 4.386419840965464, - "learning_rate": 7.4337886807127235e-06, - "loss": 0.6252, + "epoch": 0.24, + "grad_norm": 0.8758467923005988, + "learning_rate": 8.872400443700383e-06, + "loss": 0.4774, "step": 3403 }, { - "epoch": 0.36, - "grad_norm": 4.1534089316760054, - "learning_rate": 7.4322998242472135e-06, - "loss": 0.6707, + "epoch": 0.24, + "grad_norm": 1.8882105758588144, + "learning_rate": 8.871673380376026e-06, + "loss": 0.6176, "step": 3404 }, { - "epoch": 0.36, - "grad_norm": 2.7988368901968124, - "learning_rate": 7.430810685204137e-06, - "loss": 0.6584, + "epoch": 0.24, + "grad_norm": 1.935302769428423, + "learning_rate": 8.870946112536763e-06, + "loss": 0.5188, "step": 3405 }, { - "epoch": 0.36, - "grad_norm": 3.919583307019518, - "learning_rate": 7.4293212637565045e-06, - "loss": 0.6966, + "epoch": 0.24, + "grad_norm": 1.861928847353188, + "learning_rate": 8.870218640221011e-06, + "loss": 0.5767, "step": 3406 }, { - "epoch": 0.36, - "grad_norm": 2.508307648411482, - "learning_rate": 7.427831560077349e-06, - "loss": 0.6444, + "epoch": 0.24, + "grad_norm": 1.8935851587127654, + "learning_rate": 8.869490963467195e-06, + "loss": 0.5104, "step": 3407 }, { - "epoch": 0.36, - "grad_norm": 1.3042696713402557, - "learning_rate": 7.426341574339741e-06, - "loss": 0.5891, + "epoch": 0.24, + "grad_norm": 1.8484801924340701, + "learning_rate": 8.868763082313755e-06, + "loss": 0.57, "step": 3408 }, { - "epoch": 0.36, - "grad_norm": 3.582107451003599, - "learning_rate": 7.424851306716783e-06, - "loss": 0.6359, + "epoch": 0.24, + "grad_norm": 0.7996558087023813, + "learning_rate": 8.86803499679914e-06, + "loss": 0.4849, "step": 3409 }, { - "epoch": 0.36, - "grad_norm": 2.904925684194061, - "learning_rate": 7.42336075738161e-06, - "loss": 0.6671, + "epoch": 0.24, + "grad_norm": 1.6713140938385975, + "learning_rate": 8.86730670696181e-06, + "loss": 0.5831, "step": 3410 }, { - "epoch": 0.36, - "grad_norm": 3.5167258077031267, - "learning_rate": 7.421869926507389e-06, - "loss": 0.7021, + "epoch": 0.24, + "grad_norm": 2.017440219714671, + "learning_rate": 8.866578212840234e-06, + "loss": 0.5203, "step": 3411 }, { - "epoch": 0.36, - "grad_norm": 4.358687817951523, - "learning_rate": 7.420378814267322e-06, - "loss": 0.7419, + "epoch": 0.24, + "grad_norm": 1.9987851957854041, + "learning_rate": 8.865849514472897e-06, + "loss": 0.5589, "step": 3412 }, { - "epoch": 0.36, - "grad_norm": 3.152148471276924, - "learning_rate": 7.41888742083464e-06, - "loss": 0.6744, + "epoch": 0.24, + "grad_norm": 2.0285033662316905, + "learning_rate": 8.865120611898287e-06, + "loss": 0.6466, "step": 3413 }, { - "epoch": 0.36, - "grad_norm": 2.3915603282771976, - "learning_rate": 7.417395746382608e-06, - "loss": 0.6771, + "epoch": 0.24, + "grad_norm": 1.7931993978893772, + "learning_rate": 8.864391505154912e-06, + "loss": 0.5257, "step": 3414 }, { - "epoch": 0.36, - "grad_norm": 3.7148070409783536, - "learning_rate": 7.415903791084529e-06, - "loss": 0.7244, + "epoch": 0.24, + "grad_norm": 0.8831048325066075, + "learning_rate": 8.863662194281283e-06, + "loss": 0.5023, "step": 3415 }, { - "epoch": 0.36, - "grad_norm": 3.752027193158189, - "learning_rate": 7.41441155511373e-06, - "loss": 0.6177, + "epoch": 0.24, + "grad_norm": 2.2260120966845296, + "learning_rate": 8.862932679315925e-06, + "loss": 0.6673, "step": 3416 }, { - "epoch": 0.36, - "grad_norm": 2.62863853424837, - "learning_rate": 7.412919038643577e-06, - "loss": 0.6457, + "epoch": 0.24, + "grad_norm": 1.9230770384485054, + "learning_rate": 8.862202960297373e-06, + "loss": 0.5856, "step": 3417 }, { - "epoch": 0.36, - "grad_norm": 2.5794611356979993, - "learning_rate": 7.411426241847463e-06, - "loss": 0.6303, + "epoch": 0.24, + "grad_norm": 1.8525638865704859, + "learning_rate": 8.861473037264172e-06, + "loss": 0.5947, "step": 3418 }, { - "epoch": 0.36, - "grad_norm": 3.9676318300037527, - "learning_rate": 7.409933164898819e-06, - "loss": 0.6817, + "epoch": 0.24, + "grad_norm": 1.7987450902675497, + "learning_rate": 8.860742910254883e-06, + "loss": 0.5676, "step": 3419 }, { - "epoch": 0.36, - "grad_norm": 2.8550149054743, - "learning_rate": 7.408439807971108e-06, - "loss": 0.654, + "epoch": 0.24, + "grad_norm": 1.7501097369893384, + "learning_rate": 8.86001257930807e-06, + "loss": 0.6023, "step": 3420 }, { - "epoch": 0.36, - "grad_norm": 5.195120384669288, - "learning_rate": 7.406946171237822e-06, - "loss": 0.579, + "epoch": 0.24, + "grad_norm": 1.6669726175076547, + "learning_rate": 8.859282044462315e-06, + "loss": 0.5478, "step": 3421 }, { - "epoch": 0.36, - "grad_norm": 3.0078016488240107, - "learning_rate": 7.4054522548724874e-06, - "loss": 0.6311, + "epoch": 0.24, + "grad_norm": 1.7995837146222344, + "learning_rate": 8.858551305756205e-06, + "loss": 0.6506, "step": 3422 }, { - "epoch": 0.36, - "grad_norm": 2.4860091683873784, - "learning_rate": 7.403958059048662e-06, - "loss": 0.6168, + "epoch": 0.24, + "grad_norm": 1.9642765276053822, + "learning_rate": 8.85782036322834e-06, + "loss": 0.492, "step": 3423 }, { - "epoch": 0.36, - "grad_norm": 2.570809434838443, - "learning_rate": 7.40246358393994e-06, - "loss": 0.591, + "epoch": 0.24, + "grad_norm": 0.7977435079913767, + "learning_rate": 8.85708921691733e-06, + "loss": 0.4886, "step": 3424 }, { - "epoch": 0.36, - "grad_norm": 3.7959371110166256, - "learning_rate": 7.4009688297199436e-06, - "loss": 0.7263, + "epoch": 0.24, + "grad_norm": 6.70194082583396, + "learning_rate": 8.8563578668618e-06, + "loss": 0.5758, "step": 3425 }, { - "epoch": 0.36, - "grad_norm": 2.774483241184963, - "learning_rate": 7.3994737965623285e-06, - "loss": 0.6566, + "epoch": 0.24, + "grad_norm": 1.6488801009177294, + "learning_rate": 8.855626313100379e-06, + "loss": 0.5226, "step": 3426 }, { - "epoch": 0.36, - "grad_norm": 2.455385395465524, - "learning_rate": 7.397978484640783e-06, - "loss": 0.5688, + "epoch": 0.24, + "grad_norm": 1.7511417443422959, + "learning_rate": 8.854894555671712e-06, + "loss": 0.5124, "step": 3427 }, { - "epoch": 0.36, - "grad_norm": 2.422389158395626, - "learning_rate": 7.396482894129031e-06, - "loss": 0.6501, + "epoch": 0.24, + "grad_norm": 1.7206348938506097, + "learning_rate": 8.854162594614452e-06, + "loss": 0.5804, "step": 3428 }, { - "epoch": 0.36, - "grad_norm": 2.9579176252075983, - "learning_rate": 7.3949870252008215e-06, - "loss": 0.5639, + "epoch": 0.24, + "grad_norm": 3.3563531547394385, + "learning_rate": 8.853430429967264e-06, + "loss": 0.6214, "step": 3429 }, { - "epoch": 0.36, - "grad_norm": 3.2151993534590684, - "learning_rate": 7.393490878029945e-06, - "loss": 0.6368, + "epoch": 0.24, + "grad_norm": 3.9641546307059965, + "learning_rate": 8.852698061768824e-06, + "loss": 0.6669, "step": 3430 }, { - "epoch": 0.36, - "grad_norm": 2.4041060387118276, - "learning_rate": 7.391994452790217e-06, - "loss": 0.7303, + "epoch": 0.24, + "grad_norm": 0.7465698912055478, + "learning_rate": 8.851965490057817e-06, + "loss": 0.4389, "step": 3431 }, { - "epoch": 0.36, - "grad_norm": 2.7465187882412563, - "learning_rate": 7.390497749655487e-06, - "loss": 0.6067, + "epoch": 0.24, + "grad_norm": 1.9743001609983004, + "learning_rate": 8.851232714872941e-06, + "loss": 0.5189, "step": 3432 }, { - "epoch": 0.36, - "grad_norm": 2.714071334159705, - "learning_rate": 7.389000768799638e-06, - "loss": 0.6604, + "epoch": 0.24, + "grad_norm": 2.0046359008536094, + "learning_rate": 8.850499736252905e-06, + "loss": 0.5376, "step": 3433 }, { - "epoch": 0.36, - "grad_norm": 2.9662799463771505, - "learning_rate": 7.387503510396586e-06, - "loss": 0.6275, + "epoch": 0.24, + "grad_norm": 1.761337934695754, + "learning_rate": 8.849766554236424e-06, + "loss": 0.6417, "step": 3434 }, { - "epoch": 0.36, - "grad_norm": 2.3746212916638507, - "learning_rate": 7.386005974620278e-06, - "loss": 0.7533, + "epoch": 0.24, + "grad_norm": 2.2105193602497772, + "learning_rate": 8.849033168862227e-06, + "loss": 0.4733, "step": 3435 }, { - "epoch": 0.36, - "grad_norm": 3.6759513228513154, - "learning_rate": 7.384508161644694e-06, - "loss": 0.6726, + "epoch": 0.24, + "grad_norm": 1.6234712266133848, + "learning_rate": 8.848299580169058e-06, + "loss": 0.5681, "step": 3436 }, { - "epoch": 0.36, - "grad_norm": 4.2759734265835565, - "learning_rate": 7.383010071643844e-06, - "loss": 0.6219, + "epoch": 0.24, + "grad_norm": 2.0103743771062934, + "learning_rate": 8.847565788195664e-06, + "loss": 0.504, "step": 3437 }, { - "epoch": 0.36, - "grad_norm": 1.0676737541014298, - "learning_rate": 7.381511704791771e-06, - "loss": 0.5884, + "epoch": 0.24, + "grad_norm": 1.655915699849422, + "learning_rate": 8.84683179298081e-06, + "loss": 0.581, "step": 3438 }, { - "epoch": 0.36, - "grad_norm": 3.292189795768927, - "learning_rate": 7.380013061262557e-06, - "loss": 0.5909, + "epoch": 0.24, + "grad_norm": 1.9135385025949316, + "learning_rate": 8.846097594563263e-06, + "loss": 0.627, "step": 3439 }, { - "epoch": 0.36, - "grad_norm": 3.097298037082653, - "learning_rate": 7.3785141412303e-06, - "loss": 0.6163, + "epoch": 0.24, + "grad_norm": 1.7086543023647893, + "learning_rate": 8.84536319298181e-06, + "loss": 0.6422, "step": 3440 }, { - "epoch": 0.36, - "grad_norm": 2.816854958679348, - "learning_rate": 7.37701494486915e-06, - "loss": 0.6845, + "epoch": 0.24, + "grad_norm": 1.785381852632581, + "learning_rate": 8.844628588275244e-06, + "loss": 0.5677, "step": 3441 }, { - "epoch": 0.36, - "grad_norm": 4.788505661813447, - "learning_rate": 7.375515472353272e-06, - "loss": 0.6201, + "epoch": 0.24, + "grad_norm": 0.7914085017741087, + "learning_rate": 8.84389378048237e-06, + "loss": 0.4542, "step": 3442 }, { - "epoch": 0.36, - "grad_norm": 4.129127828825236, - "learning_rate": 7.374015723856873e-06, - "loss": 0.6793, + "epoch": 0.24, + "grad_norm": 1.7858583924554032, + "learning_rate": 8.843158769641997e-06, + "loss": 0.5783, "step": 3443 }, { - "epoch": 0.36, - "grad_norm": 2.3574849469565544, - "learning_rate": 7.372515699554191e-06, - "loss": 0.6221, + "epoch": 0.24, + "grad_norm": 1.8508121264111628, + "learning_rate": 8.842423555792959e-06, + "loss": 0.594, "step": 3444 }, { - "epoch": 0.36, - "grad_norm": 3.3812374504332805, - "learning_rate": 7.371015399619494e-06, - "loss": 0.6603, + "epoch": 0.24, + "grad_norm": 1.7288205426254026, + "learning_rate": 8.841688138974087e-06, + "loss": 0.5837, "step": 3445 }, { - "epoch": 0.36, - "grad_norm": 3.0367925120459303, - "learning_rate": 7.369514824227082e-06, - "loss": 0.6616, + "epoch": 0.24, + "grad_norm": 2.455679525334658, + "learning_rate": 8.840952519224232e-06, + "loss": 0.5535, "step": 3446 }, { - "epoch": 0.36, - "grad_norm": 2.6543688097654665, - "learning_rate": 7.368013973551286e-06, - "loss": 0.7013, + "epoch": 0.24, + "grad_norm": 1.7682201070459607, + "learning_rate": 8.84021669658225e-06, + "loss": 0.5546, "step": 3447 }, { - "epoch": 0.36, - "grad_norm": 2.1635818274199434, - "learning_rate": 7.366512847766472e-06, - "loss": 0.6885, + "epoch": 0.24, + "grad_norm": 1.6579213658951488, + "learning_rate": 8.839480671087007e-06, + "loss": 0.5379, "step": 3448 }, { - "epoch": 0.36, - "grad_norm": 3.085467390645828, - "learning_rate": 7.365011447047036e-06, - "loss": 0.7165, + "epoch": 0.24, + "grad_norm": 0.8245617895428267, + "learning_rate": 8.838744442777387e-06, + "loss": 0.4598, "step": 3449 }, { - "epoch": 0.36, - "grad_norm": 2.283937568641037, - "learning_rate": 7.363509771567408e-06, - "loss": 0.6338, + "epoch": 0.24, + "grad_norm": 1.5934922178449806, + "learning_rate": 8.838008011692278e-06, + "loss": 0.5677, "step": 3450 }, { - "epoch": 0.36, - "grad_norm": 3.1768009624928166, - "learning_rate": 7.362007821502045e-06, - "loss": 0.747, + "epoch": 0.24, + "grad_norm": 1.75936575272406, + "learning_rate": 8.83727137787058e-06, + "loss": 0.5856, "step": 3451 }, { - "epoch": 0.36, - "grad_norm": 3.0324905424749597, - "learning_rate": 7.360505597025442e-06, - "loss": 0.685, + "epoch": 0.24, + "grad_norm": 0.7208201357000927, + "learning_rate": 8.836534541351207e-06, + "loss": 0.4484, "step": 3452 }, { - "epoch": 0.36, - "grad_norm": 2.983148422243415, - "learning_rate": 7.359003098312123e-06, - "loss": 0.6152, + "epoch": 0.25, + "grad_norm": 1.6980040234053784, + "learning_rate": 8.835797502173077e-06, + "loss": 0.5711, "step": 3453 }, { - "epoch": 0.36, - "grad_norm": 2.9793497750002658, - "learning_rate": 7.357500325536644e-06, - "loss": 0.7456, + "epoch": 0.25, + "grad_norm": 1.8600968149400643, + "learning_rate": 8.835060260375128e-06, + "loss": 0.5784, "step": 3454 }, { - "epoch": 0.36, - "grad_norm": 2.941874609510829, - "learning_rate": 7.355997278873589e-06, - "loss": 0.6748, + "epoch": 0.25, + "grad_norm": 1.7849903875635742, + "learning_rate": 8.8343228159963e-06, + "loss": 0.6281, "step": 3455 }, { - "epoch": 0.36, - "grad_norm": 2.1486776506884153, - "learning_rate": 7.354493958497583e-06, - "loss": 0.661, + "epoch": 0.25, + "grad_norm": 1.8013249183948317, + "learning_rate": 8.833585169075549e-06, + "loss": 0.6454, "step": 3456 }, { - "epoch": 0.36, - "grad_norm": 1.0170095119117828, - "learning_rate": 7.3529903645832744e-06, - "loss": 0.5884, + "epoch": 0.25, + "grad_norm": 1.9324670045002184, + "learning_rate": 8.832847319651838e-06, + "loss": 0.5858, "step": 3457 }, { - "epoch": 0.36, - "grad_norm": 2.4036589735821483, - "learning_rate": 7.351486497305347e-06, - "loss": 0.5853, + "epoch": 0.25, + "grad_norm": 1.5633632684464063, + "learning_rate": 8.832109267764146e-06, + "loss": 0.5598, "step": 3458 }, { - "epoch": 0.36, - "grad_norm": 3.310188596068514, - "learning_rate": 7.349982356838515e-06, - "loss": 0.7171, + "epoch": 0.25, + "grad_norm": 1.5872701807507894, + "learning_rate": 8.831371013451456e-06, + "loss": 0.5538, "step": 3459 }, { - "epoch": 0.36, - "grad_norm": 2.942433337193868, - "learning_rate": 7.348477943357527e-06, - "loss": 0.6472, + "epoch": 0.25, + "grad_norm": 1.541561437277759, + "learning_rate": 8.830632556752768e-06, + "loss": 0.5574, "step": 3460 }, { - "epoch": 0.36, - "grad_norm": 4.969354980228407, - "learning_rate": 7.34697325703716e-06, - "loss": 0.6456, + "epoch": 0.25, + "grad_norm": 2.102746657500715, + "learning_rate": 8.829893897707087e-06, + "loss": 0.4949, "step": 3461 }, { - "epoch": 0.36, - "grad_norm": 3.8519505648735737, - "learning_rate": 7.345468298052224e-06, - "loss": 0.6262, + "epoch": 0.25, + "grad_norm": 1.8557942315246043, + "learning_rate": 8.829155036353435e-06, + "loss": 0.5407, "step": 3462 }, { - "epoch": 0.36, - "grad_norm": 3.685187241926782, - "learning_rate": 7.343963066577563e-06, - "loss": 0.6571, + "epoch": 0.25, + "grad_norm": 1.5838015296039594, + "learning_rate": 8.828415972730835e-06, + "loss": 0.5147, "step": 3463 }, { - "epoch": 0.36, - "grad_norm": 3.437533087132148, - "learning_rate": 7.342457562788046e-06, - "loss": 0.6799, + "epoch": 0.25, + "grad_norm": 1.9397121146740615, + "learning_rate": 8.827676706878334e-06, + "loss": 0.5389, "step": 3464 }, { - "epoch": 0.36, - "grad_norm": 4.281370556976629, - "learning_rate": 7.340951786858583e-06, - "loss": 0.688, + "epoch": 0.25, + "grad_norm": 1.8032931813713424, + "learning_rate": 8.82693723883498e-06, + "loss": 0.572, "step": 3465 }, { - "epoch": 0.36, - "grad_norm": 3.1063629908728014, - "learning_rate": 7.339445738964106e-06, - "loss": 0.7737, + "epoch": 0.25, + "grad_norm": 2.1517770548393793, + "learning_rate": 8.826197568639832e-06, + "loss": 0.5909, "step": 3466 }, { - "epoch": 0.36, - "grad_norm": 2.0889025130238092, - "learning_rate": 7.337939419279588e-06, - "loss": 0.6248, + "epoch": 0.25, + "grad_norm": 0.9217569071861116, + "learning_rate": 8.825457696331964e-06, + "loss": 0.4632, "step": 3467 }, { - "epoch": 0.36, - "grad_norm": 2.815104069216375, - "learning_rate": 7.336432827980026e-06, - "loss": 0.6681, + "epoch": 0.25, + "grad_norm": 2.0452288666075975, + "learning_rate": 8.824717621950457e-06, + "loss": 0.5778, "step": 3468 }, { - "epoch": 0.37, - "grad_norm": 4.1118170144076265, - "learning_rate": 7.334925965240451e-06, - "loss": 0.6273, + "epoch": 0.25, + "grad_norm": 2.202214700435194, + "learning_rate": 8.823977345534407e-06, + "loss": 0.4832, "step": 3469 }, { - "epoch": 0.37, - "grad_norm": 3.2170988071407147, - "learning_rate": 7.333418831235928e-06, - "loss": 0.6412, + "epoch": 0.25, + "grad_norm": 1.539471232651271, + "learning_rate": 8.823236867122916e-06, + "loss": 0.5829, "step": 3470 }, { - "epoch": 0.37, - "grad_norm": 3.4121748554575984, - "learning_rate": 7.33191142614155e-06, - "loss": 0.698, + "epoch": 0.25, + "grad_norm": 0.8652601412332672, + "learning_rate": 8.822496186755098e-06, + "loss": 0.4558, "step": 3471 }, { - "epoch": 0.37, - "grad_norm": 3.0702451497078247, - "learning_rate": 7.330403750132443e-06, - "loss": 0.5974, + "epoch": 0.25, + "grad_norm": 1.897618882322111, + "learning_rate": 8.821755304470078e-06, + "loss": 0.5739, "step": 3472 }, { - "epoch": 0.37, - "grad_norm": 2.896556923363, - "learning_rate": 7.328895803383764e-06, - "loss": 0.6431, + "epoch": 0.25, + "grad_norm": 2.354083817730479, + "learning_rate": 8.821014220306995e-06, + "loss": 0.5728, "step": 3473 }, { - "epoch": 0.37, - "grad_norm": 2.849283645322886, - "learning_rate": 7.327387586070705e-06, - "loss": 0.6288, + "epoch": 0.25, + "grad_norm": 1.7150135664999324, + "learning_rate": 8.820272934304992e-06, + "loss": 0.5161, "step": 3474 }, { - "epoch": 0.37, - "grad_norm": 3.6979247535807063, - "learning_rate": 7.325879098368483e-06, - "loss": 0.6332, + "epoch": 0.25, + "grad_norm": 1.6994329328027284, + "learning_rate": 8.819531446503229e-06, + "loss": 0.5637, "step": 3475 }, { - "epoch": 0.37, - "grad_norm": 2.7475025526366377, - "learning_rate": 7.324370340452351e-06, - "loss": 0.6657, + "epoch": 0.25, + "grad_norm": 1.5721964388322847, + "learning_rate": 8.818789756940872e-06, + "loss": 0.5373, "step": 3476 }, { - "epoch": 0.37, - "grad_norm": 2.9659631728372777, - "learning_rate": 7.322861312497591e-06, - "loss": 0.6951, + "epoch": 0.25, + "grad_norm": 1.5352522368072474, + "learning_rate": 8.8180478656571e-06, + "loss": 0.5292, "step": 3477 }, { - "epoch": 0.37, - "grad_norm": 3.1844754621892837, - "learning_rate": 7.321352014679522e-06, - "loss": 0.5929, + "epoch": 0.25, + "grad_norm": 1.5997852640084447, + "learning_rate": 8.817305772691103e-06, + "loss": 0.6079, "step": 3478 }, { - "epoch": 0.37, - "grad_norm": 2.974837282969043, - "learning_rate": 7.319842447173482e-06, - "loss": 0.5501, + "epoch": 0.25, + "grad_norm": 1.4661214049391107, + "learning_rate": 8.81656347808208e-06, + "loss": 0.5085, "step": 3479 }, { - "epoch": 0.37, - "grad_norm": 2.892368554345106, - "learning_rate": 7.318332610154854e-06, - "loss": 0.7127, + "epoch": 0.25, + "grad_norm": 1.9624290405753892, + "learning_rate": 8.815820981869243e-06, + "loss": 0.5288, "step": 3480 }, { - "epoch": 0.37, - "grad_norm": 2.715197258419502, - "learning_rate": 7.3168225037990434e-06, - "loss": 0.6408, + "epoch": 0.25, + "grad_norm": 1.5262242644345902, + "learning_rate": 8.81507828409181e-06, + "loss": 0.5548, "step": 3481 }, { - "epoch": 0.37, - "grad_norm": 2.5710173525042554, - "learning_rate": 7.315312128281493e-06, - "loss": 0.6488, + "epoch": 0.25, + "grad_norm": 1.5870541444861637, + "learning_rate": 8.814335384789016e-06, + "loss": 0.535, "step": 3482 }, { - "epoch": 0.37, - "grad_norm": 2.0270385193056204, - "learning_rate": 7.313801483777674e-06, - "loss": 0.6552, + "epoch": 0.25, + "grad_norm": 1.5355892094361028, + "learning_rate": 8.813592284000101e-06, + "loss": 0.5335, "step": 3483 }, { - "epoch": 0.37, - "grad_norm": 4.205749249065878, - "learning_rate": 7.3122905704630845e-06, - "loss": 0.6876, + "epoch": 0.25, + "grad_norm": 1.7237255756449659, + "learning_rate": 8.812848981764321e-06, + "loss": 0.5499, "step": 3484 }, { - "epoch": 0.37, - "grad_norm": 3.688817976879616, - "learning_rate": 7.310779388513263e-06, - "loss": 0.6637, + "epoch": 0.25, + "grad_norm": 1.5740628164363135, + "learning_rate": 8.812105478120936e-06, + "loss": 0.4714, "step": 3485 }, { - "epoch": 0.37, - "grad_norm": 2.401643485510619, - "learning_rate": 7.309267938103769e-06, - "loss": 0.6614, + "epoch": 0.25, + "grad_norm": 2.1097888519167722, + "learning_rate": 8.811361773109224e-06, + "loss": 0.6267, "step": 3486 }, { - "epoch": 0.37, - "grad_norm": 2.4560485153621237, - "learning_rate": 7.307756219410205e-06, - "loss": 0.6329, + "epoch": 0.25, + "grad_norm": 2.1379227450921263, + "learning_rate": 8.810617866768469e-06, + "loss": 0.5612, "step": 3487 }, { - "epoch": 0.37, - "grad_norm": 2.5820481308855046, - "learning_rate": 7.306244232608191e-06, - "loss": 0.7006, + "epoch": 0.25, + "grad_norm": 1.50045944397671, + "learning_rate": 8.809873759137966e-06, + "loss": 0.6065, "step": 3488 }, { - "epoch": 0.37, - "grad_norm": 1.1020552524421927, - "learning_rate": 7.304731977873392e-06, - "loss": 0.5822, + "epoch": 0.25, + "grad_norm": 4.18048567606914, + "learning_rate": 8.80912945025702e-06, + "loss": 0.521, "step": 3489 }, { - "epoch": 0.37, - "grad_norm": 2.584430521631613, - "learning_rate": 7.303219455381491e-06, - "loss": 0.6619, + "epoch": 0.25, + "grad_norm": 1.7995178717734759, + "learning_rate": 8.80838494016495e-06, + "loss": 0.5408, "step": 3490 }, { - "epoch": 0.37, - "grad_norm": 2.298444021464788, - "learning_rate": 7.301706665308212e-06, - "loss": 0.7223, + "epoch": 0.25, + "grad_norm": 0.8077891822337105, + "learning_rate": 8.807640228901084e-06, + "loss": 0.4764, "step": 3491 }, { - "epoch": 0.37, - "grad_norm": 6.985437156669851, - "learning_rate": 7.300193607829308e-06, - "loss": 0.6624, + "epoch": 0.25, + "grad_norm": 1.582827847937871, + "learning_rate": 8.806895316504757e-06, + "loss": 0.5122, "step": 3492 }, { - "epoch": 0.37, - "grad_norm": 2.590700474555624, - "learning_rate": 7.298680283120558e-06, - "loss": 0.6532, + "epoch": 0.25, + "grad_norm": 0.7826633836294917, + "learning_rate": 8.806150203015322e-06, + "loss": 0.4571, "step": 3493 }, { - "epoch": 0.37, - "grad_norm": 2.3977866755703614, - "learning_rate": 7.29716669135778e-06, - "loss": 0.7138, + "epoch": 0.25, + "grad_norm": 1.9297796702987702, + "learning_rate": 8.805404888472135e-06, + "loss": 0.5722, "step": 3494 }, { - "epoch": 0.37, - "grad_norm": 2.7237583049566525, - "learning_rate": 7.295652832716814e-06, - "loss": 0.6091, + "epoch": 0.25, + "grad_norm": 2.946500502481895, + "learning_rate": 8.804659372914568e-06, + "loss": 0.6609, "step": 3495 }, { - "epoch": 0.37, - "grad_norm": 2.789323251837038, - "learning_rate": 7.294138707373539e-06, - "loss": 0.6027, + "epoch": 0.25, + "grad_norm": 1.737788881605149, + "learning_rate": 8.803913656382e-06, + "loss": 0.5031, "step": 3496 }, { - "epoch": 0.37, - "grad_norm": 2.1445610603977494, - "learning_rate": 7.29262431550386e-06, - "loss": 0.6358, + "epoch": 0.25, + "grad_norm": 1.5966975589760444, + "learning_rate": 8.803167738913824e-06, + "loss": 0.5698, "step": 3497 }, { - "epoch": 0.37, - "grad_norm": 2.7828156452167434, - "learning_rate": 7.2911096572837155e-06, - "loss": 0.7049, + "epoch": 0.25, + "grad_norm": 2.0462722042309056, + "learning_rate": 8.80242162054944e-06, + "loss": 0.495, "step": 3498 }, { - "epoch": 0.37, - "grad_norm": 3.062889194963662, - "learning_rate": 7.289594732889073e-06, - "loss": 0.6906, + "epoch": 0.25, + "grad_norm": 1.7553543656359716, + "learning_rate": 8.801675301328263e-06, + "loss": 0.5906, "step": 3499 }, { - "epoch": 0.37, - "grad_norm": 3.187017863959395, - "learning_rate": 7.288079542495936e-06, - "loss": 0.6046, + "epoch": 0.25, + "grad_norm": 1.7334826578400364, + "learning_rate": 8.800928781289715e-06, + "loss": 0.5813, "step": 3500 }, { - "epoch": 0.37, - "grad_norm": 2.80365868383857, - "learning_rate": 7.286564086280329e-06, - "loss": 0.6339, + "epoch": 0.25, + "grad_norm": 1.7819994045250607, + "learning_rate": 8.800182060473228e-06, + "loss": 0.6215, "step": 3501 }, { - "epoch": 0.37, - "grad_norm": 2.978700832429271, - "learning_rate": 7.285048364418319e-06, - "loss": 0.6488, + "epoch": 0.25, + "grad_norm": 1.6444762719282644, + "learning_rate": 8.799435138918248e-06, + "loss": 0.6026, "step": 3502 }, { - "epoch": 0.37, - "grad_norm": 2.9173805232519245, - "learning_rate": 7.283532377085992e-06, - "loss": 0.6956, + "epoch": 0.25, + "grad_norm": 2.0387462172174264, + "learning_rate": 8.798688016664231e-06, + "loss": 0.536, "step": 3503 }, { - "epoch": 0.37, - "grad_norm": 3.020587219160722, - "learning_rate": 7.282016124459477e-06, - "loss": 0.6508, + "epoch": 0.25, + "grad_norm": 1.574216448719919, + "learning_rate": 8.79794069375064e-06, + "loss": 0.5819, "step": 3504 }, { - "epoch": 0.37, - "grad_norm": 2.3101783919909056, - "learning_rate": 7.280499606714923e-06, - "loss": 0.6795, + "epoch": 0.25, + "grad_norm": 1.710748567411178, + "learning_rate": 8.797193170216953e-06, + "loss": 0.5713, "step": 3505 }, { - "epoch": 0.37, - "grad_norm": 3.9706549307214556, - "learning_rate": 7.27898282402852e-06, - "loss": 0.7841, + "epoch": 0.25, + "grad_norm": 1.530247772013323, + "learning_rate": 8.796445446102657e-06, + "loss": 0.5199, "step": 3506 }, { - "epoch": 0.37, - "grad_norm": 3.2123361694164423, - "learning_rate": 7.277465776576478e-06, - "loss": 0.6485, + "epoch": 0.25, + "grad_norm": 2.0611077545947203, + "learning_rate": 8.795697521447248e-06, + "loss": 0.5632, "step": 3507 }, { - "epoch": 0.37, - "grad_norm": 2.833849508292802, - "learning_rate": 7.275948464535045e-06, - "loss": 0.6073, + "epoch": 0.25, + "grad_norm": 2.0571110120031584, + "learning_rate": 8.794949396290233e-06, + "loss": 0.6028, "step": 3508 }, { - "epoch": 0.37, - "grad_norm": 2.190248056297931, - "learning_rate": 7.274430888080502e-06, - "loss": 0.6136, + "epoch": 0.25, + "grad_norm": 1.8216311390643178, + "learning_rate": 8.794201070671134e-06, + "loss": 0.5385, "step": 3509 }, { - "epoch": 0.37, - "grad_norm": 3.801011293418992, - "learning_rate": 7.27291304738915e-06, - "loss": 0.6847, + "epoch": 0.25, + "grad_norm": 1.6748061102813903, + "learning_rate": 8.793452544629475e-06, + "loss": 0.5395, "step": 3510 }, { - "epoch": 0.37, - "grad_norm": 6.725920278243489, - "learning_rate": 7.271394942637332e-06, - "loss": 0.6099, + "epoch": 0.25, + "grad_norm": 1.4779900339120047, + "learning_rate": 8.7927038182048e-06, + "loss": 0.531, "step": 3511 }, { - "epoch": 0.37, - "grad_norm": 2.3603811567927466, - "learning_rate": 7.269876574001414e-06, - "loss": 0.6546, + "epoch": 0.25, + "grad_norm": 2.4029747250691638, + "learning_rate": 8.791954891436658e-06, + "loss": 0.5418, "step": 3512 }, { - "epoch": 0.37, - "grad_norm": 2.775937881236346, - "learning_rate": 7.2683579416578e-06, - "loss": 0.6915, + "epoch": 0.25, + "grad_norm": 1.7349472507867285, + "learning_rate": 8.79120576436461e-06, + "loss": 0.5477, "step": 3513 }, { - "epoch": 0.37, - "grad_norm": 2.542726679655711, - "learning_rate": 7.266839045782914e-06, - "loss": 0.6684, + "epoch": 0.25, + "grad_norm": 1.5693731850250041, + "learning_rate": 8.790456437028228e-06, + "loss": 0.4997, "step": 3514 }, { - "epoch": 0.37, - "grad_norm": 2.7964307299094453, - "learning_rate": 7.265319886553223e-06, - "loss": 0.594, + "epoch": 0.25, + "grad_norm": 23.3374808625812, + "learning_rate": 8.78970690946709e-06, + "loss": 0.5701, "step": 3515 }, { - "epoch": 0.37, - "grad_norm": 2.37740356376484, - "learning_rate": 7.263800464145214e-06, - "loss": 0.7419, + "epoch": 0.25, + "grad_norm": 1.6067107721998015, + "learning_rate": 8.788957181720796e-06, + "loss": 0.5767, "step": 3516 }, { - "epoch": 0.37, - "grad_norm": 3.485584641652323, - "learning_rate": 7.262280778735412e-06, - "loss": 0.6703, + "epoch": 0.25, + "grad_norm": 3.434441181565452, + "learning_rate": 8.788207253828943e-06, + "loss": 0.5739, "step": 3517 }, { - "epoch": 0.37, - "grad_norm": 3.4839449059905907, - "learning_rate": 7.26076083050037e-06, - "loss": 0.675, + "epoch": 0.25, + "grad_norm": 1.7339622491702287, + "learning_rate": 8.787457125831146e-06, + "loss": 0.5323, "step": 3518 }, { - "epoch": 0.37, - "grad_norm": 2.12300936003989, - "learning_rate": 7.259240619616668e-06, - "loss": 0.6224, + "epoch": 0.25, + "grad_norm": 1.8539633730372977, + "learning_rate": 8.78670679776703e-06, + "loss": 0.5825, "step": 3519 }, { - "epoch": 0.37, - "grad_norm": 2.6719730804814863, - "learning_rate": 7.257720146260923e-06, - "loss": 0.659, + "epoch": 0.25, + "grad_norm": 1.6797740836557598, + "learning_rate": 8.78595626967623e-06, + "loss": 0.6299, "step": 3520 }, { - "epoch": 0.37, - "grad_norm": 3.73292327896022, - "learning_rate": 7.256199410609776e-06, - "loss": 0.6476, + "epoch": 0.25, + "grad_norm": 2.086605116187921, + "learning_rate": 8.785205541598391e-06, + "loss": 0.5388, "step": 3521 }, { - "epoch": 0.37, - "grad_norm": 4.636014633612924, - "learning_rate": 7.254678412839905e-06, - "loss": 0.6879, + "epoch": 0.25, + "grad_norm": 4.604162152847393, + "learning_rate": 8.784454613573172e-06, + "loss": 0.58, "step": 3522 }, { - "epoch": 0.37, - "grad_norm": 2.5306894259549075, - "learning_rate": 7.253157153128012e-06, - "loss": 0.6785, + "epoch": 0.25, + "grad_norm": 1.7765282804776734, + "learning_rate": 8.783703485640233e-06, + "loss": 0.5325, "step": 3523 }, { - "epoch": 0.37, - "grad_norm": 3.976569034282962, - "learning_rate": 7.251635631650838e-06, - "loss": 0.7064, + "epoch": 0.25, + "grad_norm": 1.5141759617941786, + "learning_rate": 8.782952157839258e-06, + "loss": 0.4905, "step": 3524 }, { - "epoch": 0.37, - "grad_norm": 2.45250759237423, - "learning_rate": 7.250113848585141e-06, - "loss": 0.6499, + "epoch": 0.25, + "grad_norm": 2.290043947055051, + "learning_rate": 8.78220063020993e-06, + "loss": 0.5032, "step": 3525 }, { - "epoch": 0.37, - "grad_norm": 2.060166639945123, - "learning_rate": 7.248591804107724e-06, - "loss": 0.6757, + "epoch": 0.25, + "grad_norm": 1.948274071610963, + "learning_rate": 8.781448902791949e-06, + "loss": 0.5588, "step": 3526 }, { - "epoch": 0.37, - "grad_norm": 5.716487897796609, - "learning_rate": 7.247069498395409e-06, - "loss": 0.6512, + "epoch": 0.25, + "grad_norm": 1.741558502112802, + "learning_rate": 8.780696975625023e-06, + "loss": 0.6184, "step": 3527 }, { - "epoch": 0.37, - "grad_norm": 3.4915603318297346, - "learning_rate": 7.245546931625057e-06, - "loss": 0.6518, + "epoch": 0.25, + "grad_norm": 1.7883370117449195, + "learning_rate": 8.779944848748874e-06, + "loss": 0.5751, "step": 3528 }, { - "epoch": 0.37, - "grad_norm": 2.657417413138783, - "learning_rate": 7.244024103973553e-06, - "loss": 0.6662, + "epoch": 0.25, + "grad_norm": 2.002023239896317, + "learning_rate": 8.779192522203229e-06, + "loss": 0.5615, "step": 3529 }, { - "epoch": 0.37, - "grad_norm": 4.6985543615408805, - "learning_rate": 7.242501015617815e-06, - "loss": 0.5643, + "epoch": 0.25, + "grad_norm": 1.555600383358493, + "learning_rate": 8.778439996027827e-06, + "loss": 0.5592, "step": 3530 }, { - "epoch": 0.37, - "grad_norm": 2.7512005261618566, - "learning_rate": 7.240977666734793e-06, - "loss": 0.6004, + "epoch": 0.25, + "grad_norm": 1.7099922262436646, + "learning_rate": 8.777687270262425e-06, + "loss": 0.6077, "step": 3531 }, { - "epoch": 0.37, - "grad_norm": 2.3062901989306015, - "learning_rate": 7.239454057501462e-06, - "loss": 0.6528, + "epoch": 0.25, + "grad_norm": 2.3038663002315953, + "learning_rate": 8.776934344946779e-06, + "loss": 0.5292, "step": 3532 }, { - "epoch": 0.37, - "grad_norm": 3.1690879827237803, - "learning_rate": 7.237930188094834e-06, - "loss": 0.6433, + "epoch": 0.25, + "grad_norm": 1.563421308569948, + "learning_rate": 8.776181220120662e-06, + "loss": 0.6121, "step": 3533 }, { - "epoch": 0.37, - "grad_norm": 2.189006985940273, - "learning_rate": 7.236406058691944e-06, - "loss": 0.6005, + "epoch": 0.25, + "grad_norm": 1.4620536538123339, + "learning_rate": 8.775427895823859e-06, + "loss": 0.635, "step": 3534 }, { - "epoch": 0.37, - "grad_norm": 2.5436760311571067, - "learning_rate": 7.234881669469864e-06, - "loss": 0.6815, + "epoch": 0.25, + "grad_norm": 1.0061452867765013, + "learning_rate": 8.774674372096161e-06, + "loss": 0.4557, "step": 3535 }, { - "epoch": 0.37, - "grad_norm": 2.5440441226370254, - "learning_rate": 7.233357020605692e-06, - "loss": 0.5973, + "epoch": 0.25, + "grad_norm": 2.4079815511394007, + "learning_rate": 8.773920648977371e-06, + "loss": 0.6528, "step": 3536 }, { - "epoch": 0.37, - "grad_norm": 2.8717435554951423, - "learning_rate": 7.2318321122765575e-06, - "loss": 0.7263, + "epoch": 0.25, + "grad_norm": 1.7633387585185842, + "learning_rate": 8.773166726507307e-06, + "loss": 0.5575, "step": 3537 }, { - "epoch": 0.37, - "grad_norm": 2.5676017996288674, - "learning_rate": 7.230306944659618e-06, - "loss": 0.6429, + "epoch": 0.25, + "grad_norm": 2.3141514005483037, + "learning_rate": 8.77241260472579e-06, + "loss": 0.676, "step": 3538 }, { - "epoch": 0.37, - "grad_norm": 3.320166184610786, - "learning_rate": 7.2287815179320665e-06, - "loss": 0.7412, + "epoch": 0.25, + "grad_norm": 5.767353373868913, + "learning_rate": 8.771658283672657e-06, + "loss": 0.6051, "step": 3539 }, { - "epoch": 0.37, - "grad_norm": 4.887680699145069, - "learning_rate": 7.227255832271122e-06, - "loss": 0.7, + "epoch": 0.25, + "grad_norm": 1.5544666038167252, + "learning_rate": 8.770903763387753e-06, + "loss": 0.6121, "step": 3540 }, { - "epoch": 0.37, - "grad_norm": 2.2913081303644938, - "learning_rate": 7.225729887854032e-06, - "loss": 0.6098, + "epoch": 0.25, + "grad_norm": 1.6045519539605706, + "learning_rate": 8.770149043910937e-06, + "loss": 0.5737, "step": 3541 }, { - "epoch": 0.37, - "grad_norm": 4.5364276369948415, - "learning_rate": 7.224203684858078e-06, - "loss": 0.639, + "epoch": 0.25, + "grad_norm": 0.8660916038448847, + "learning_rate": 8.76939412528207e-06, + "loss": 0.467, "step": 3542 }, { - "epoch": 0.37, - "grad_norm": 2.9611456139801065, - "learning_rate": 7.222677223460567e-06, - "loss": 0.626, + "epoch": 0.25, + "grad_norm": 2.052165391738364, + "learning_rate": 8.768639007541034e-06, + "loss": 0.5898, "step": 3543 }, { - "epoch": 0.37, - "grad_norm": 4.607908981424635, - "learning_rate": 7.221150503838844e-06, - "loss": 0.6371, + "epoch": 0.25, + "grad_norm": 2.312284320166707, + "learning_rate": 8.767883690727716e-06, + "loss": 0.5306, "step": 3544 }, { - "epoch": 0.37, - "grad_norm": 2.6691678732909114, - "learning_rate": 7.219623526170275e-06, - "loss": 0.6519, + "epoch": 0.25, + "grad_norm": 0.9091648374909655, + "learning_rate": 8.767128174882013e-06, + "loss": 0.4577, "step": 3545 }, { - "epoch": 0.37, - "grad_norm": 2.914049840880877, - "learning_rate": 7.218096290632263e-06, - "loss": 0.6737, + "epoch": 0.25, + "grad_norm": 1.6442467419450388, + "learning_rate": 8.766372460043837e-06, + "loss": 0.5471, "step": 3546 }, { - "epoch": 0.37, - "grad_norm": 4.551071513521762, - "learning_rate": 7.216568797402232e-06, - "loss": 0.616, + "epoch": 0.25, + "grad_norm": 1.8064950103547923, + "learning_rate": 8.765616546253105e-06, + "loss": 0.4484, "step": 3547 }, { - "epoch": 0.37, - "grad_norm": 2.5813083708314255, - "learning_rate": 7.2150410466576495e-06, - "loss": 0.7268, - "step": 3548 + "epoch": 0.25, + "grad_norm": 2.293471862718506, + "learning_rate": 8.764860433549747e-06, + "loss": 0.5535, + "step": 3548 }, { - "epoch": 0.37, - "grad_norm": 3.0795987810833583, - "learning_rate": 7.213513038575999e-06, - "loss": 0.6526, + "epoch": 0.25, + "grad_norm": 1.7258487168381427, + "learning_rate": 8.764104121973702e-06, + "loss": 0.6364, "step": 3549 }, { - "epoch": 0.37, - "grad_norm": 2.2721784087350216, - "learning_rate": 7.211984773334803e-06, - "loss": 0.6302, + "epoch": 0.25, + "grad_norm": 1.495264853913481, + "learning_rate": 8.763347611564925e-06, + "loss": 0.5826, "step": 3550 }, { - "epoch": 0.37, - "grad_norm": 1.928510250413681, - "learning_rate": 7.210456251111611e-06, - "loss": 0.6411, + "epoch": 0.25, + "grad_norm": 1.7052199262358334, + "learning_rate": 8.762590902363375e-06, + "loss": 0.6062, "step": 3551 }, { - "epoch": 0.37, - "grad_norm": 4.445810874079125, - "learning_rate": 7.208927472084e-06, - "loss": 0.6141, + "epoch": 0.25, + "grad_norm": 1.8488618109319745, + "learning_rate": 8.761833994409023e-06, + "loss": 0.604, "step": 3552 }, { - "epoch": 0.37, - "grad_norm": 2.3202477177734657, - "learning_rate": 7.207398436429581e-06, - "loss": 0.6578, + "epoch": 0.25, + "grad_norm": 2.3144500101480365, + "learning_rate": 8.761076887741855e-06, + "loss": 0.5915, "step": 3553 }, { - "epoch": 0.37, - "grad_norm": 4.305688026429538, - "learning_rate": 7.205869144325992e-06, - "loss": 0.6092, + "epoch": 0.25, + "grad_norm": 1.3787992968939957, + "learning_rate": 8.760319582401859e-06, + "loss": 0.5249, "step": 3554 }, { - "epoch": 0.37, - "grad_norm": 2.346775916923627, - "learning_rate": 7.204339595950904e-06, - "loss": 0.5894, + "epoch": 0.25, + "grad_norm": 1.9700810291058084, + "learning_rate": 8.759562078429043e-06, + "loss": 0.5436, "step": 3555 }, { - "epoch": 0.37, - "grad_norm": 3.8477133741441674, - "learning_rate": 7.202809791482013e-06, - "loss": 0.6427, + "epoch": 0.25, + "grad_norm": 1.8836617555997572, + "learning_rate": 8.758804375863417e-06, + "loss": 0.6626, "step": 3556 }, { - "epoch": 0.37, - "grad_norm": 3.235974425228947, - "learning_rate": 7.201279731097048e-06, - "loss": 0.6674, + "epoch": 0.25, + "grad_norm": 1.7456918223000262, + "learning_rate": 8.758046474745008e-06, + "loss": 0.5911, "step": 3557 }, { - "epoch": 0.37, - "grad_norm": 2.2493046168393445, - "learning_rate": 7.199749414973767e-06, - "loss": 0.6528, + "epoch": 0.25, + "grad_norm": 2.0569664889441714, + "learning_rate": 8.75728837511385e-06, + "loss": 0.6069, "step": 3558 }, { - "epoch": 0.37, - "grad_norm": 1.107170893614456, - "learning_rate": 7.1982188432899595e-06, - "loss": 0.5877, + "epoch": 0.25, + "grad_norm": 1.5936160525488972, + "learning_rate": 8.756530077009988e-06, + "loss": 0.4951, "step": 3559 }, { - "epoch": 0.37, - "grad_norm": 3.3599103332329485, - "learning_rate": 7.196688016223439e-06, - "loss": 0.7369, + "epoch": 0.25, + "grad_norm": 1.5244428736375484, + "learning_rate": 8.75577158047348e-06, + "loss": 0.5089, "step": 3560 }, { - "epoch": 0.37, - "grad_norm": 2.4363610701892657, - "learning_rate": 7.195156933952055e-06, - "loss": 0.6762, + "epoch": 0.25, + "grad_norm": 1.5430495511142288, + "learning_rate": 8.755012885544389e-06, + "loss": 0.5366, "step": 3561 }, { - "epoch": 0.37, - "grad_norm": 2.7535120357249556, - "learning_rate": 7.193625596653684e-06, - "loss": 0.6812, + "epoch": 0.25, + "grad_norm": 0.8838013558654617, + "learning_rate": 8.754253992262796e-06, + "loss": 0.4571, "step": 3562 }, { - "epoch": 0.37, - "grad_norm": 2.099885019771948, - "learning_rate": 7.1920940045062335e-06, - "loss": 0.6133, + "epoch": 0.25, + "grad_norm": 1.7312084227295266, + "learning_rate": 8.753494900668785e-06, + "loss": 0.5838, "step": 3563 }, { - "epoch": 0.38, - "grad_norm": 3.93938976849278, - "learning_rate": 7.1905621576876375e-06, - "loss": 0.6401, + "epoch": 0.25, + "grad_norm": 1.6012524798619687, + "learning_rate": 8.752735610802454e-06, + "loss": 0.6118, "step": 3564 }, { - "epoch": 0.38, - "grad_norm": 2.4121806622384656, - "learning_rate": 7.189030056375862e-06, - "loss": 0.629, + "epoch": 0.25, + "grad_norm": 1.8498373207413248, + "learning_rate": 8.751976122703913e-06, + "loss": 0.5768, "step": 3565 }, { - "epoch": 0.38, - "grad_norm": 2.832137503000308, - "learning_rate": 7.187497700748903e-06, - "loss": 0.7839, + "epoch": 0.25, + "grad_norm": 1.7427152124593832, + "learning_rate": 8.75121643641328e-06, + "loss": 0.5889, "step": 3566 }, { - "epoch": 0.38, - "grad_norm": 2.892305433301476, - "learning_rate": 7.185965090984783e-06, - "loss": 0.6009, + "epoch": 0.25, + "grad_norm": 1.7478268876946392, + "learning_rate": 8.750456551970684e-06, + "loss": 0.5825, "step": 3567 }, { - "epoch": 0.38, - "grad_norm": 2.701267405207593, - "learning_rate": 7.184432227261561e-06, - "loss": 0.7288, + "epoch": 0.25, + "grad_norm": 1.9879869218371424, + "learning_rate": 8.749696469416262e-06, + "loss": 0.5337, "step": 3568 }, { - "epoch": 0.38, - "grad_norm": 3.065332041666608, - "learning_rate": 7.182899109757314e-06, - "loss": 0.6107, + "epoch": 0.25, + "grad_norm": 1.824353563765505, + "learning_rate": 8.74893618879017e-06, + "loss": 0.5455, "step": 3569 }, { - "epoch": 0.38, - "grad_norm": 2.3541441748774177, - "learning_rate": 7.181365738650161e-06, - "loss": 0.6027, + "epoch": 0.25, + "grad_norm": 2.2599102801490574, + "learning_rate": 8.748175710132562e-06, + "loss": 0.52, "step": 3570 }, { - "epoch": 0.38, - "grad_norm": 2.271716533928777, - "learning_rate": 7.17983211411824e-06, - "loss": 0.6606, + "epoch": 0.25, + "grad_norm": 1.4896632630326605, + "learning_rate": 8.747415033483615e-06, + "loss": 0.5257, "step": 3571 }, { - "epoch": 0.38, - "grad_norm": 3.88759903004812, - "learning_rate": 7.178298236339727e-06, - "loss": 0.5967, + "epoch": 0.25, + "grad_norm": 3.647331880626189, + "learning_rate": 8.746654158883507e-06, + "loss": 0.6105, "step": 3572 }, { - "epoch": 0.38, - "grad_norm": 5.01835950623007, - "learning_rate": 7.176764105492821e-06, - "loss": 0.6285, + "epoch": 0.25, + "grad_norm": 1.6630603768454533, + "learning_rate": 8.745893086372432e-06, + "loss": 0.612, "step": 3573 }, { - "epoch": 0.38, - "grad_norm": 2.7434215149338397, - "learning_rate": 7.175229721755753e-06, - "loss": 0.6626, + "epoch": 0.25, + "grad_norm": 1.7501164151634936, + "learning_rate": 8.745131815990591e-06, + "loss": 0.5057, "step": 3574 }, { - "epoch": 0.38, - "grad_norm": 3.346997092680918, - "learning_rate": 7.173695085306785e-06, - "loss": 0.7167, + "epoch": 0.25, + "grad_norm": 0.8830299607852853, + "learning_rate": 8.744370347778198e-06, + "loss": 0.452, "step": 3575 }, { - "epoch": 0.38, - "grad_norm": 3.140529020266596, - "learning_rate": 7.172160196324205e-06, - "loss": 0.658, + "epoch": 0.25, + "grad_norm": 1.5974128624558803, + "learning_rate": 8.743608681775473e-06, + "loss": 0.5617, "step": 3576 }, { - "epoch": 0.38, - "grad_norm": 4.120165751814182, - "learning_rate": 7.1706250549863335e-06, - "loss": 0.69, + "epoch": 0.25, + "grad_norm": 2.946811896788621, + "learning_rate": 8.742846818022654e-06, + "loss": 0.6471, "step": 3577 }, { - "epoch": 0.38, - "grad_norm": 2.2247914008766796, - "learning_rate": 7.1690896614715155e-06, - "loss": 0.6288, + "epoch": 0.25, + "grad_norm": 1.616861307139559, + "learning_rate": 8.742084756559984e-06, + "loss": 0.5687, "step": 3578 }, { - "epoch": 0.38, - "grad_norm": 2.605629864739022, - "learning_rate": 7.167554015958133e-06, - "loss": 0.6357, + "epoch": 0.25, + "grad_norm": 1.741814282848707, + "learning_rate": 8.741322497427717e-06, + "loss": 0.5762, "step": 3579 }, { - "epoch": 0.38, - "grad_norm": 2.3904906015835947, - "learning_rate": 7.166018118624588e-06, - "loss": 0.5556, + "epoch": 0.25, + "grad_norm": 1.8522648817421365, + "learning_rate": 8.740560040666117e-06, + "loss": 0.5924, "step": 3580 }, { - "epoch": 0.38, - "grad_norm": 2.6192217487542586, - "learning_rate": 7.164481969649323e-06, - "loss": 0.6576, + "epoch": 0.25, + "grad_norm": 1.875710171427392, + "learning_rate": 8.739797386315463e-06, + "loss": 0.4979, "step": 3581 }, { - "epoch": 0.38, - "grad_norm": 5.076216402779878, - "learning_rate": 7.162945569210796e-06, - "loss": 0.6749, + "epoch": 0.25, + "grad_norm": 1.5622426087668504, + "learning_rate": 8.739034534416038e-06, + "loss": 0.5487, "step": 3582 }, { - "epoch": 0.38, - "grad_norm": 4.889072644554949, - "learning_rate": 7.161408917487509e-06, - "loss": 0.6673, + "epoch": 0.25, + "grad_norm": 1.5384162179633796, + "learning_rate": 8.73827148500814e-06, + "loss": 0.579, "step": 3583 }, { - "epoch": 0.38, - "grad_norm": 4.537856847059156, - "learning_rate": 7.159872014657978e-06, - "loss": 0.6752, + "epoch": 0.25, + "grad_norm": 1.7370431979731387, + "learning_rate": 8.737508238132074e-06, + "loss": 0.5223, "step": 3584 }, { - "epoch": 0.38, - "grad_norm": 2.221005642218842, - "learning_rate": 7.158334860900762e-06, - "loss": 0.5711, + "epoch": 0.25, + "grad_norm": 1.874484798726454, + "learning_rate": 8.736744793828158e-06, + "loss": 0.5775, "step": 3585 }, { - "epoch": 0.38, - "grad_norm": 2.4533316517178387, - "learning_rate": 7.156797456394441e-06, - "loss": 0.7351, + "epoch": 0.25, + "grad_norm": 1.7004036356599075, + "learning_rate": 8.735981152136721e-06, + "loss": 0.5382, "step": 3586 }, { - "epoch": 0.38, - "grad_norm": 2.886040834314495, - "learning_rate": 7.1552598013176264e-06, - "loss": 0.6767, + "epoch": 0.25, + "grad_norm": 1.9004536977696118, + "learning_rate": 8.735217313098101e-06, + "loss": 0.481, "step": 3587 }, { - "epoch": 0.38, - "grad_norm": 2.874147523804201, - "learning_rate": 7.1537218958489575e-06, - "loss": 0.6642, + "epoch": 0.25, + "grad_norm": 1.713668624278301, + "learning_rate": 8.734453276752646e-06, + "loss": 0.5715, "step": 3588 }, { - "epoch": 0.38, - "grad_norm": 2.722531139220571, - "learning_rate": 7.152183740167105e-06, - "loss": 0.6678, + "epoch": 0.25, + "grad_norm": 1.6254213474064918, + "learning_rate": 8.733689043140713e-06, + "loss": 0.5364, "step": 3589 }, { - "epoch": 0.38, - "grad_norm": 3.187200392218844, - "learning_rate": 7.150645334450767e-06, - "loss": 0.7528, + "epoch": 0.25, + "grad_norm": 1.784269153006098, + "learning_rate": 8.732924612302675e-06, + "loss": 0.6085, "step": 3590 }, { - "epoch": 0.38, - "grad_norm": 2.3536496010454147, - "learning_rate": 7.14910667887867e-06, - "loss": 0.6902, + "epoch": 0.25, + "grad_norm": 1.6464505079732679, + "learning_rate": 8.732159984278909e-06, + "loss": 0.5949, "step": 3591 }, { - "epoch": 0.38, - "grad_norm": 2.0650748967886057, - "learning_rate": 7.147567773629573e-06, - "loss": 0.6505, + "epoch": 0.25, + "grad_norm": 3.838063209545325, + "learning_rate": 8.731395159109808e-06, + "loss": 0.6504, "step": 3592 }, { - "epoch": 0.38, - "grad_norm": 2.86825048250956, - "learning_rate": 7.146028618882258e-06, - "loss": 0.7501, + "epoch": 0.25, + "grad_norm": 1.57740299795768, + "learning_rate": 8.73063013683577e-06, + "loss": 0.5621, "step": 3593 }, { - "epoch": 0.38, - "grad_norm": 2.5434125470032756, - "learning_rate": 7.1444892148155445e-06, - "loss": 0.7357, + "epoch": 0.26, + "grad_norm": 1.6912789431169566, + "learning_rate": 8.729864917497206e-06, + "loss": 0.5993, "step": 3594 }, { - "epoch": 0.38, - "grad_norm": 2.4145813864328374, - "learning_rate": 7.14294956160827e-06, - "loss": 0.613, + "epoch": 0.26, + "grad_norm": 1.494865070165952, + "learning_rate": 8.729099501134539e-06, + "loss": 0.526, "step": 3595 }, { - "epoch": 0.38, - "grad_norm": 2.141187484264333, - "learning_rate": 7.141409659439313e-06, - "loss": 0.6496, + "epoch": 0.26, + "grad_norm": 1.4821482489130822, + "learning_rate": 8.728333887788201e-06, + "loss": 0.5951, "step": 3596 }, { - "epoch": 0.38, - "grad_norm": 2.9374788957358136, - "learning_rate": 7.139869508487569e-06, - "loss": 0.6382, + "epoch": 0.26, + "grad_norm": 1.0279595466081974, + "learning_rate": 8.727568077498634e-06, + "loss": 0.4724, "step": 3597 }, { - "epoch": 0.38, - "grad_norm": 2.4608281779178722, - "learning_rate": 7.138329108931974e-06, - "loss": 0.6155, + "epoch": 0.26, + "grad_norm": 1.6208396064027573, + "learning_rate": 8.726802070306289e-06, + "loss": 0.5704, "step": 3598 }, { - "epoch": 0.38, - "grad_norm": 2.9810869246330727, - "learning_rate": 7.136788460951482e-06, - "loss": 0.6844, + "epoch": 0.26, + "grad_norm": 1.5428340869398243, + "learning_rate": 8.726035866251632e-06, + "loss": 0.506, "step": 3599 }, { - "epoch": 0.38, - "grad_norm": 2.261119179477242, - "learning_rate": 7.135247564725085e-06, - "loss": 0.6355, + "epoch": 0.26, + "grad_norm": 1.4950042363695166, + "learning_rate": 8.725269465375135e-06, + "loss": 0.5819, "step": 3600 }, { - "epoch": 0.38, - "grad_norm": 2.655641062191612, - "learning_rate": 7.133706420431799e-06, - "loss": 0.578, + "epoch": 0.26, + "grad_norm": 2.2244418817594855, + "learning_rate": 8.724502867717281e-06, + "loss": 0.5982, "step": 3601 }, { - "epoch": 0.38, - "grad_norm": 1.0452192913458778, - "learning_rate": 7.132165028250666e-06, - "loss": 0.6081, + "epoch": 0.26, + "grad_norm": 1.5067785678131795, + "learning_rate": 8.723736073318565e-06, + "loss": 0.5595, "step": 3602 }, { - "epoch": 0.38, - "grad_norm": 2.5869892334643882, - "learning_rate": 7.130623388360767e-06, - "loss": 0.7272, + "epoch": 0.26, + "grad_norm": 1.6553742766615005, + "learning_rate": 8.722969082219494e-06, + "loss": 0.5069, "step": 3603 }, { - "epoch": 0.38, - "grad_norm": 2.7640836198719785, - "learning_rate": 7.129081500941199e-06, - "loss": 0.6978, + "epoch": 0.26, + "grad_norm": 1.9575609355860462, + "learning_rate": 8.722201894460577e-06, + "loss": 0.5878, "step": 3604 }, { - "epoch": 0.38, - "grad_norm": 2.768985249973699, - "learning_rate": 7.127539366171099e-06, - "loss": 0.5961, + "epoch": 0.26, + "grad_norm": 2.506846191681386, + "learning_rate": 8.721434510082347e-06, + "loss": 0.575, "step": 3605 }, { - "epoch": 0.38, - "grad_norm": 3.9567725278063506, - "learning_rate": 7.125996984229623e-06, - "loss": 0.685, + "epoch": 0.26, + "grad_norm": 2.301053292514493, + "learning_rate": 8.720666929125335e-06, + "loss": 0.5878, "step": 3606 }, { - "epoch": 0.38, - "grad_norm": 2.879031446261374, - "learning_rate": 7.124454355295966e-06, - "loss": 0.6856, + "epoch": 0.26, + "grad_norm": 1.9236638537373143, + "learning_rate": 8.719899151630088e-06, + "loss": 0.5708, "step": 3607 }, { - "epoch": 0.38, - "grad_norm": 2.4194323937650752, - "learning_rate": 7.1229114795493405e-06, - "loss": 0.6894, + "epoch": 0.26, + "grad_norm": 1.6333889889102935, + "learning_rate": 8.719131177637165e-06, + "loss": 0.5898, "step": 3608 }, { - "epoch": 0.38, - "grad_norm": 0.9796362738749906, - "learning_rate": 7.121368357168997e-06, - "loss": 0.5798, + "epoch": 0.26, + "grad_norm": 2.124134934066161, + "learning_rate": 8.718363007187129e-06, + "loss": 0.5832, "step": 3609 }, { - "epoch": 0.38, - "grad_norm": 2.51120233981379, - "learning_rate": 7.11982498833421e-06, - "loss": 0.7307, + "epoch": 0.26, + "grad_norm": 1.702871232379821, + "learning_rate": 8.717594640320562e-06, + "loss": 0.5314, "step": 3610 }, { - "epoch": 0.38, - "grad_norm": 0.9959995491929295, - "learning_rate": 7.1182813732242835e-06, - "loss": 0.5706, + "epoch": 0.26, + "grad_norm": 2.7625790736531948, + "learning_rate": 8.716826077078047e-06, + "loss": 0.581, "step": 3611 }, { - "epoch": 0.38, - "grad_norm": 2.693687958001663, - "learning_rate": 7.116737512018551e-06, - "loss": 0.598, + "epoch": 0.26, + "grad_norm": 1.8255145779819335, + "learning_rate": 8.716057317500183e-06, + "loss": 0.5748, "step": 3612 }, { - "epoch": 0.38, - "grad_norm": 2.298264703609383, - "learning_rate": 7.115193404896372e-06, - "loss": 0.6928, + "epoch": 0.26, + "grad_norm": 2.898807726093886, + "learning_rate": 8.715288361627581e-06, + "loss": 0.5606, "step": 3613 }, { - "epoch": 0.38, - "grad_norm": 4.7510712526228716, - "learning_rate": 7.1136490520371394e-06, - "loss": 0.6345, + "epoch": 0.26, + "grad_norm": 1.7745438437899488, + "learning_rate": 8.71451920950086e-06, + "loss": 0.6497, "step": 3614 }, { - "epoch": 0.38, - "grad_norm": 2.545812637717511, - "learning_rate": 7.112104453620269e-06, - "loss": 0.7136, + "epoch": 0.26, + "grad_norm": 2.068276608729876, + "learning_rate": 8.713749861160647e-06, + "loss": 0.5827, "step": 3615 }, { - "epoch": 0.38, - "grad_norm": 2.4425094561499003, - "learning_rate": 7.11055960982521e-06, - "loss": 0.6611, + "epoch": 0.26, + "grad_norm": 2.263145071139074, + "learning_rate": 8.712980316647582e-06, + "loss": 0.6332, "step": 3616 }, { - "epoch": 0.38, - "grad_norm": 3.2838270941484837, - "learning_rate": 7.109014520831433e-06, - "loss": 0.6004, + "epoch": 0.26, + "grad_norm": 1.4428324207610437, + "learning_rate": 8.712210576002316e-06, + "loss": 0.5832, "step": 3617 }, { - "epoch": 0.38, - "grad_norm": 3.6464684533156175, - "learning_rate": 7.10746918681845e-06, - "loss": 0.7205, + "epoch": 0.26, + "grad_norm": 0.8754212296269239, + "learning_rate": 8.711440639265507e-06, + "loss": 0.4773, "step": 3618 }, { - "epoch": 0.38, - "grad_norm": 2.494247980704131, - "learning_rate": 7.105923607965786e-06, - "loss": 0.6274, + "epoch": 0.26, + "grad_norm": 0.8072928209018205, + "learning_rate": 8.710670506477829e-06, + "loss": 0.456, "step": 3619 }, { - "epoch": 0.38, - "grad_norm": 2.4050078671251107, - "learning_rate": 7.104377784453005e-06, - "loss": 0.6664, + "epoch": 0.26, + "grad_norm": 2.108762898757041, + "learning_rate": 8.709900177679961e-06, + "loss": 0.5224, "step": 3620 }, { - "epoch": 0.38, - "grad_norm": 2.404246434896617, - "learning_rate": 7.102831716459696e-06, - "loss": 0.5954, + "epoch": 0.26, + "grad_norm": 0.7143354334254715, + "learning_rate": 8.709129652912595e-06, + "loss": 0.4544, "step": 3621 }, { - "epoch": 0.38, - "grad_norm": 2.623044062785884, - "learning_rate": 7.101285404165478e-06, - "loss": 0.6359, + "epoch": 0.26, + "grad_norm": 1.606053300195936, + "learning_rate": 8.708358932216431e-06, + "loss": 0.5885, "step": 3622 }, { - "epoch": 0.38, - "grad_norm": 2.5546602923700013, - "learning_rate": 7.099738847749995e-06, - "loss": 0.6142, + "epoch": 0.26, + "grad_norm": 1.7759187246092598, + "learning_rate": 8.707588015632184e-06, + "loss": 0.5672, "step": 3623 }, { - "epoch": 0.38, - "grad_norm": 2.5511546021541984, - "learning_rate": 7.098192047392923e-06, - "loss": 0.6326, + "epoch": 0.26, + "grad_norm": 2.63649911595348, + "learning_rate": 8.706816903200576e-06, + "loss": 0.5776, "step": 3624 }, { - "epoch": 0.38, - "grad_norm": 2.863354029549815, - "learning_rate": 7.096645003273964e-06, - "loss": 0.7335, + "epoch": 0.26, + "grad_norm": 0.8646140014532562, + "learning_rate": 8.706045594962338e-06, + "loss": 0.467, "step": 3625 }, { - "epoch": 0.38, - "grad_norm": 2.6001421531585835, - "learning_rate": 7.095097715572849e-06, - "loss": 0.6356, + "epoch": 0.26, + "grad_norm": 2.5040473623783774, + "learning_rate": 8.705274090958213e-06, + "loss": 0.5585, "step": 3626 }, { - "epoch": 0.38, - "grad_norm": 2.6189887318128875, - "learning_rate": 7.093550184469339e-06, - "loss": 0.6734, + "epoch": 0.26, + "grad_norm": 1.7324541301129097, + "learning_rate": 8.704502391228955e-06, + "loss": 0.5534, "step": 3627 }, { - "epoch": 0.38, - "grad_norm": 2.9686532488381214, - "learning_rate": 7.092002410143218e-06, - "loss": 0.6598, + "epoch": 0.26, + "grad_norm": 1.7710468926980585, + "learning_rate": 8.70373049581533e-06, + "loss": 0.584, "step": 3628 }, { - "epoch": 0.38, - "grad_norm": 2.885198141114095, - "learning_rate": 7.0904543927743066e-06, - "loss": 0.7185, + "epoch": 0.26, + "grad_norm": 1.9989333625908405, + "learning_rate": 8.702958404758109e-06, + "loss": 0.5992, "step": 3629 }, { - "epoch": 0.38, - "grad_norm": 2.655688880025789, - "learning_rate": 7.088906132542446e-06, - "loss": 0.6541, + "epoch": 0.26, + "grad_norm": 1.7654461142506783, + "learning_rate": 8.702186118098076e-06, + "loss": 0.5447, "step": 3630 }, { - "epoch": 0.38, - "grad_norm": 2.7287819033410154, - "learning_rate": 7.0873576296275096e-06, - "loss": 0.6549, + "epoch": 0.26, + "grad_norm": 1.7381693940197311, + "learning_rate": 8.701413635876031e-06, + "loss": 0.5715, "step": 3631 }, { - "epoch": 0.38, - "grad_norm": 2.6674782827937946, - "learning_rate": 7.085808884209396e-06, - "loss": 0.5593, + "epoch": 0.26, + "grad_norm": 3.3475359920593384, + "learning_rate": 8.700640958132774e-06, + "loss": 0.5568, "step": 3632 }, { - "epoch": 0.38, - "grad_norm": 2.3031490250344473, - "learning_rate": 7.084259896468038e-06, - "loss": 0.6337, + "epoch": 0.26, + "grad_norm": 1.5845307990415023, + "learning_rate": 8.699868084909122e-06, + "loss": 0.5565, "step": 3633 }, { - "epoch": 0.38, - "grad_norm": 1.243801593410752, - "learning_rate": 7.082710666583389e-06, - "loss": 0.5897, + "epoch": 0.26, + "grad_norm": 2.0047742271623905, + "learning_rate": 8.699095016245901e-06, + "loss": 0.5646, "step": 3634 }, { - "epoch": 0.38, - "grad_norm": 3.3382949550635908, - "learning_rate": 7.081161194735435e-06, - "loss": 0.7044, + "epoch": 0.26, + "grad_norm": 0.7925378875225985, + "learning_rate": 8.698321752183946e-06, + "loss": 0.4613, "step": 3635 }, { - "epoch": 0.38, - "grad_norm": 3.2631769374260293, - "learning_rate": 7.0796114811041905e-06, - "loss": 0.6655, + "epoch": 0.26, + "grad_norm": 1.7921994629169096, + "learning_rate": 8.697548292764106e-06, + "loss": 0.5864, "step": 3636 }, { - "epoch": 0.38, - "grad_norm": 1.1036281150291933, - "learning_rate": 7.078061525869695e-06, - "loss": 0.6465, + "epoch": 0.26, + "grad_norm": 1.7084199426081939, + "learning_rate": 8.696774638027237e-06, + "loss": 0.4903, "step": 3637 }, { - "epoch": 0.38, - "grad_norm": 2.7686792433647867, - "learning_rate": 7.076511329212019e-06, - "loss": 0.674, + "epoch": 0.26, + "grad_norm": 0.9043494285887166, + "learning_rate": 8.696000788014203e-06, + "loss": 0.4651, "step": 3638 }, { - "epoch": 0.38, - "grad_norm": 3.7550739067072625, - "learning_rate": 7.074960891311258e-06, - "loss": 0.6734, + "epoch": 0.26, + "grad_norm": 1.7604120874343454, + "learning_rate": 8.695226742765886e-06, + "loss": 0.6693, "step": 3639 }, { - "epoch": 0.38, - "grad_norm": 2.177433134571054, - "learning_rate": 7.073410212347541e-06, - "loss": 0.6277, + "epoch": 0.26, + "grad_norm": 2.189263168230681, + "learning_rate": 8.69445250232317e-06, + "loss": 0.5724, "step": 3640 }, { - "epoch": 0.38, - "grad_norm": 2.7882879054667487, - "learning_rate": 7.071859292501018e-06, - "loss": 0.6493, + "epoch": 0.26, + "grad_norm": 1.7532587805774091, + "learning_rate": 8.693678066726954e-06, + "loss": 0.5295, "step": 3641 }, { - "epoch": 0.38, - "grad_norm": 3.434366748004624, - "learning_rate": 7.070308131951872e-06, - "loss": 0.6723, + "epoch": 0.26, + "grad_norm": 1.587597835912209, + "learning_rate": 8.692903436018146e-06, + "loss": 0.5891, "step": 3642 }, { - "epoch": 0.38, - "grad_norm": 2.7276152477544935, - "learning_rate": 7.068756730880311e-06, - "loss": 0.622, + "epoch": 0.26, + "grad_norm": 1.610792688572172, + "learning_rate": 8.692128610237666e-06, + "loss": 0.6089, "step": 3643 }, { - "epoch": 0.38, - "grad_norm": 3.0417680257370083, - "learning_rate": 7.067205089466574e-06, - "loss": 0.6209, + "epoch": 0.26, + "grad_norm": 1.8678905178735024, + "learning_rate": 8.691353589426443e-06, + "loss": 0.5845, "step": 3644 }, { - "epoch": 0.38, - "grad_norm": 2.321254791228394, - "learning_rate": 7.065653207890924e-06, - "loss": 0.6661, + "epoch": 0.26, + "grad_norm": 3.2330473809015503, + "learning_rate": 8.690578373625414e-06, + "loss": 0.5595, "step": 3645 }, { - "epoch": 0.38, - "grad_norm": 2.8612826916827196, - "learning_rate": 7.064101086333657e-06, - "loss": 0.6949, + "epoch": 0.26, + "grad_norm": 2.218967301274481, + "learning_rate": 8.68980296287553e-06, + "loss": 0.5914, "step": 3646 }, { - "epoch": 0.38, - "grad_norm": 3.1520069324380366, - "learning_rate": 7.0625487249750915e-06, - "loss": 0.6501, + "epoch": 0.26, + "grad_norm": 1.7549158855386653, + "learning_rate": 8.689027357217751e-06, + "loss": 0.5471, "step": 3647 }, { - "epoch": 0.38, - "grad_norm": 3.964198431485837, - "learning_rate": 7.060996123995576e-06, - "loss": 0.6407, + "epoch": 0.26, + "grad_norm": 1.468988440212857, + "learning_rate": 8.688251556693049e-06, + "loss": 0.5992, "step": 3648 }, { - "epoch": 0.38, - "grad_norm": 2.93236095260122, - "learning_rate": 7.059443283575492e-06, - "loss": 0.5939, + "epoch": 0.26, + "grad_norm": 1.9179764601407747, + "learning_rate": 8.6874755613424e-06, + "loss": 0.5192, "step": 3649 }, { - "epoch": 0.38, - "grad_norm": 2.6552920819656887, - "learning_rate": 7.0578902038952375e-06, - "loss": 0.6932, + "epoch": 0.26, + "grad_norm": 1.8650251537227889, + "learning_rate": 8.686699371206795e-06, + "loss": 0.5759, "step": 3650 }, { - "epoch": 0.38, - "grad_norm": 2.9297734442881906, - "learning_rate": 7.056336885135251e-06, - "loss": 0.6419, + "epoch": 0.26, + "grad_norm": 1.8132813440249933, + "learning_rate": 8.68592298632724e-06, + "loss": 0.6232, "step": 3651 }, { - "epoch": 0.38, - "grad_norm": 2.6224581309833943, - "learning_rate": 7.054783327475987e-06, - "loss": 0.675, + "epoch": 0.26, + "grad_norm": 1.9072099590090135, + "learning_rate": 8.685146406744744e-06, + "loss": 0.5648, "step": 3652 }, { - "epoch": 0.38, - "grad_norm": 2.7160780202997263, - "learning_rate": 7.053229531097937e-06, - "loss": 0.656, + "epoch": 0.26, + "grad_norm": 1.8352834947803371, + "learning_rate": 8.684369632500326e-06, + "loss": 0.5583, "step": 3653 }, { - "epoch": 0.38, - "grad_norm": 3.3774431250824373, - "learning_rate": 7.051675496181614e-06, - "loss": 0.5999, + "epoch": 0.26, + "grad_norm": 1.9478162998984117, + "learning_rate": 8.68359266363502e-06, + "loss": 0.6115, "step": 3654 }, { - "epoch": 0.38, - "grad_norm": 1.2783376063724854, - "learning_rate": 7.050121222907564e-06, - "loss": 0.5744, + "epoch": 0.26, + "grad_norm": 1.4721815213799276, + "learning_rate": 8.682815500189868e-06, + "loss": 0.5679, "step": 3655 }, { - "epoch": 0.38, - "grad_norm": 2.3589367318753514, - "learning_rate": 7.048566711456355e-06, - "loss": 0.6371, + "epoch": 0.26, + "grad_norm": 2.400695922871463, + "learning_rate": 8.682038142205922e-06, + "loss": 0.5429, "step": 3656 }, { - "epoch": 0.38, - "grad_norm": 2.6774468417048696, - "learning_rate": 7.047011962008589e-06, - "loss": 0.6447, + "epoch": 0.26, + "grad_norm": 1.8518293260502567, + "learning_rate": 8.681260589724245e-06, + "loss": 0.5488, "step": 3657 }, { - "epoch": 0.38, - "grad_norm": 2.4342842413375734, - "learning_rate": 7.04545697474489e-06, - "loss": 0.6903, + "epoch": 0.26, + "grad_norm": 1.575391248252196, + "learning_rate": 8.680482842785909e-06, + "loss": 0.5391, "step": 3658 }, { - "epoch": 0.39, - "grad_norm": 2.252872706576466, - "learning_rate": 7.043901749845913e-06, - "loss": 0.6952, + "epoch": 0.26, + "grad_norm": 1.4891242458232472, + "learning_rate": 8.679704901432e-06, + "loss": 0.4822, "step": 3659 }, { - "epoch": 0.39, - "grad_norm": 2.3175374076123303, - "learning_rate": 7.042346287492339e-06, - "loss": 0.708, + "epoch": 0.26, + "grad_norm": 1.644929364259764, + "learning_rate": 8.678926765703609e-06, + "loss": 0.5722, "step": 3660 }, { - "epoch": 0.39, - "grad_norm": 2.634929578368015, - "learning_rate": 7.040790587864875e-06, - "loss": 0.6424, + "epoch": 0.26, + "grad_norm": 2.1478295783341457, + "learning_rate": 8.678148435641837e-06, + "loss": 0.558, "step": 3661 }, { - "epoch": 0.39, - "grad_norm": 2.618636727946655, - "learning_rate": 7.039234651144262e-06, - "loss": 0.6591, + "epoch": 0.26, + "grad_norm": 2.939418933932354, + "learning_rate": 8.677369911287806e-06, + "loss": 0.615, "step": 3662 }, { - "epoch": 0.39, - "grad_norm": 2.904861681615746, - "learning_rate": 7.037678477511261e-06, - "loss": 0.7491, + "epoch": 0.26, + "grad_norm": 0.7629138496993778, + "learning_rate": 8.676591192682633e-06, + "loss": 0.4562, "step": 3663 }, { - "epoch": 0.39, - "grad_norm": 2.5649713303543047, - "learning_rate": 7.036122067146667e-06, - "loss": 0.703, + "epoch": 0.26, + "grad_norm": 1.7632948497629903, + "learning_rate": 8.675812279867457e-06, + "loss": 0.5416, "step": 3664 }, { - "epoch": 0.39, - "grad_norm": 3.220337768555499, - "learning_rate": 7.034565420231294e-06, - "loss": 0.7034, + "epoch": 0.26, + "grad_norm": 2.0229309801397855, + "learning_rate": 8.675033172883419e-06, + "loss": 0.5525, "step": 3665 }, { - "epoch": 0.39, - "grad_norm": 2.390572089955541, - "learning_rate": 7.033008536945994e-06, - "loss": 0.6504, + "epoch": 0.26, + "grad_norm": 1.7646636554629866, + "learning_rate": 8.674253871771677e-06, + "loss": 0.6159, "step": 3666 }, { - "epoch": 0.39, - "grad_norm": 3.7954821263514296, - "learning_rate": 7.031451417471638e-06, - "loss": 0.6958, + "epoch": 0.26, + "grad_norm": 1.8909733644854134, + "learning_rate": 8.673474376573396e-06, + "loss": 0.6238, "step": 3667 }, { - "epoch": 0.39, - "grad_norm": 2.8545469996532997, - "learning_rate": 7.029894061989128e-06, - "loss": 0.7058, + "epoch": 0.26, + "grad_norm": 3.8954919363219718, + "learning_rate": 8.67269468732975e-06, + "loss": 0.5375, "step": 3668 }, { - "epoch": 0.39, - "grad_norm": 2.741678996189431, - "learning_rate": 7.0283364706793954e-06, - "loss": 0.6367, + "epoch": 0.26, + "grad_norm": 1.8170956115543786, + "learning_rate": 8.671914804081927e-06, + "loss": 0.5076, "step": 3669 }, { - "epoch": 0.39, - "grad_norm": 2.677654834887371, - "learning_rate": 7.026778643723393e-06, - "loss": 0.6911, + "epoch": 0.26, + "grad_norm": 1.3942200401772238, + "learning_rate": 8.67113472687112e-06, + "loss": 0.4806, "step": 3670 }, { - "epoch": 0.39, - "grad_norm": 2.061593434569085, - "learning_rate": 7.025220581302108e-06, - "loss": 0.6083, + "epoch": 0.26, + "grad_norm": 1.7816022807699283, + "learning_rate": 8.670354455738539e-06, + "loss": 0.5944, "step": 3671 }, { - "epoch": 0.39, - "grad_norm": 4.4271014763245535, - "learning_rate": 7.02366228359655e-06, - "loss": 0.632, + "epoch": 0.26, + "grad_norm": 1.6316943758406213, + "learning_rate": 8.669573990725399e-06, + "loss": 0.5101, "step": 3672 }, { - "epoch": 0.39, - "grad_norm": 8.875367995763348, - "learning_rate": 7.022103750787759e-06, - "loss": 0.7067, + "epoch": 0.26, + "grad_norm": 1.4967197781464874, + "learning_rate": 8.668793331872925e-06, + "loss": 0.5144, "step": 3673 }, { - "epoch": 0.39, - "grad_norm": 2.7873572210988407, - "learning_rate": 7.020544983056796e-06, - "loss": 0.7194, + "epoch": 0.26, + "grad_norm": 0.8310118763729321, + "learning_rate": 8.668012479222356e-06, + "loss": 0.4676, "step": 3674 }, { - "epoch": 0.39, - "grad_norm": 2.662020767735063, - "learning_rate": 7.0189859805847615e-06, - "loss": 0.6952, + "epoch": 0.26, + "grad_norm": 2.1099151954110615, + "learning_rate": 8.66723143281494e-06, + "loss": 0.5629, "step": 3675 }, { - "epoch": 0.39, - "grad_norm": 3.8783628852915077, - "learning_rate": 7.017426743552769e-06, - "loss": 0.6343, + "epoch": 0.26, + "grad_norm": 1.9145896579019952, + "learning_rate": 8.666450192691932e-06, + "loss": 0.554, "step": 3676 }, { - "epoch": 0.39, - "grad_norm": 3.5280735743445337, - "learning_rate": 7.015867272141972e-06, - "loss": 0.6424, + "epoch": 0.26, + "grad_norm": 1.7022551172668052, + "learning_rate": 8.665668758894603e-06, + "loss": 0.6488, "step": 3677 }, { - "epoch": 0.39, - "grad_norm": 2.457030504295436, - "learning_rate": 7.014307566533541e-06, - "loss": 0.6985, + "epoch": 0.26, + "grad_norm": 0.7522524061469686, + "learning_rate": 8.664887131464228e-06, + "loss": 0.466, "step": 3678 }, { - "epoch": 0.39, - "grad_norm": 2.102622932285392, - "learning_rate": 7.0127476269086796e-06, - "loss": 0.6618, + "epoch": 0.26, + "grad_norm": 1.810764119789444, + "learning_rate": 8.664105310442098e-06, + "loss": 0.5543, "step": 3679 }, { - "epoch": 0.39, - "grad_norm": 2.953298825950946, - "learning_rate": 7.011187453448617e-06, - "loss": 0.6986, + "epoch": 0.26, + "grad_norm": 2.267152642938336, + "learning_rate": 8.663323295869506e-06, + "loss": 0.5938, "step": 3680 }, { - "epoch": 0.39, - "grad_norm": 2.2057102088439478, - "learning_rate": 7.009627046334611e-06, - "loss": 0.7022, + "epoch": 0.26, + "grad_norm": 1.8566586317689397, + "learning_rate": 8.662541087787768e-06, + "loss": 0.6377, "step": 3681 }, { - "epoch": 0.39, - "grad_norm": 2.414332975549559, - "learning_rate": 7.008066405747943e-06, - "loss": 0.6429, + "epoch": 0.26, + "grad_norm": 1.7522319053649662, + "learning_rate": 8.661758686238198e-06, + "loss": 0.5816, "step": 3682 }, { - "epoch": 0.39, - "grad_norm": 2.484038423521051, - "learning_rate": 7.006505531869925e-06, - "loss": 0.7395, + "epoch": 0.26, + "grad_norm": 1.5241864534516252, + "learning_rate": 8.660976091262127e-06, + "loss": 0.5115, "step": 3683 }, { - "epoch": 0.39, - "grad_norm": 2.441293974717065, - "learning_rate": 7.004944424881894e-06, - "loss": 0.6179, + "epoch": 0.26, + "grad_norm": 1.6348008722108804, + "learning_rate": 8.660193302900893e-06, + "loss": 0.5689, "step": 3684 }, { - "epoch": 0.39, - "grad_norm": 2.12476887533077, - "learning_rate": 7.003383084965215e-06, - "loss": 0.5975, + "epoch": 0.26, + "grad_norm": 1.689777579743413, + "learning_rate": 8.659410321195847e-06, + "loss": 0.5593, "step": 3685 }, { - "epoch": 0.39, - "grad_norm": 2.4875957020716335, - "learning_rate": 7.001821512301283e-06, - "loss": 0.701, + "epoch": 0.26, + "grad_norm": 0.7596973400392164, + "learning_rate": 8.658627146188348e-06, + "loss": 0.4471, "step": 3686 }, { - "epoch": 0.39, - "grad_norm": 2.2622990175164963, - "learning_rate": 7.000259707071512e-06, - "loss": 0.7091, + "epoch": 0.26, + "grad_norm": 1.5181636866406203, + "learning_rate": 8.657843777919766e-06, + "loss": 0.4999, "step": 3687 }, { - "epoch": 0.39, - "grad_norm": 3.4195893855491506, - "learning_rate": 6.9986976694573515e-06, - "loss": 0.6841, + "epoch": 0.26, + "grad_norm": 1.7748232170481197, + "learning_rate": 8.657060216431481e-06, + "loss": 0.5776, "step": 3688 }, { - "epoch": 0.39, - "grad_norm": 3.6453885611711816, - "learning_rate": 6.997135399640273e-06, - "loss": 0.6114, + "epoch": 0.26, + "grad_norm": 1.9460874758566298, + "learning_rate": 8.656276461764883e-06, + "loss": 0.5621, "step": 3689 }, { - "epoch": 0.39, - "grad_norm": 2.129611856788084, - "learning_rate": 6.9955728978017775e-06, - "loss": 0.6429, + "epoch": 0.26, + "grad_norm": 1.9392380135897436, + "learning_rate": 8.655492513961375e-06, + "loss": 0.6248, "step": 3690 }, { - "epoch": 0.39, - "grad_norm": 2.2384526562197875, - "learning_rate": 6.99401016412339e-06, - "loss": 0.7059, + "epoch": 0.26, + "grad_norm": 1.515636531192478, + "learning_rate": 8.654708373062364e-06, + "loss": 0.6002, "step": 3691 }, { - "epoch": 0.39, - "grad_norm": 2.206543127801006, - "learning_rate": 6.992447198786666e-06, - "loss": 0.6889, + "epoch": 0.26, + "grad_norm": 1.6957115793116486, + "learning_rate": 8.653924039109274e-06, + "loss": 0.6129, "step": 3692 }, { - "epoch": 0.39, - "grad_norm": 2.5820156224344046, - "learning_rate": 6.990884001973187e-06, - "loss": 0.5918, + "epoch": 0.26, + "grad_norm": 1.5687243412230785, + "learning_rate": 8.653139512143534e-06, + "loss": 0.5475, "step": 3693 }, { - "epoch": 0.39, - "grad_norm": 2.705800904753045, - "learning_rate": 6.98932057386456e-06, - "loss": 0.6739, + "epoch": 0.26, + "grad_norm": 3.2031730074120555, + "learning_rate": 8.652354792206588e-06, + "loss": 0.5749, "step": 3694 }, { - "epoch": 0.39, - "grad_norm": 3.2287782223395998, - "learning_rate": 6.987756914642418e-06, - "loss": 0.5849, + "epoch": 0.26, + "grad_norm": 1.6852168019189258, + "learning_rate": 8.651569879339886e-06, + "loss": 0.4797, "step": 3695 }, { - "epoch": 0.39, - "grad_norm": 3.4820384692081654, - "learning_rate": 6.986193024488423e-06, - "loss": 0.6841, + "epoch": 0.26, + "grad_norm": 1.533342261614092, + "learning_rate": 8.650784773584887e-06, + "loss": 0.4829, "step": 3696 }, { - "epoch": 0.39, - "grad_norm": 2.90798808346997, - "learning_rate": 6.984628903584266e-06, - "loss": 0.6208, + "epoch": 0.26, + "grad_norm": 1.676613107619779, + "learning_rate": 8.649999474983069e-06, + "loss": 0.4745, "step": 3697 }, { - "epoch": 0.39, - "grad_norm": 5.045138928005131, - "learning_rate": 6.983064552111658e-06, - "loss": 0.6761, + "epoch": 0.26, + "grad_norm": 1.7145646575924403, + "learning_rate": 8.649213983575908e-06, + "loss": 0.5789, "step": 3698 }, { - "epoch": 0.39, - "grad_norm": 2.763765878384353, - "learning_rate": 6.981499970252345e-06, - "loss": 0.6429, + "epoch": 0.26, + "grad_norm": 1.7776950673731338, + "learning_rate": 8.6484282994049e-06, + "loss": 0.569, "step": 3699 }, { - "epoch": 0.39, - "grad_norm": 3.8969396881884393, - "learning_rate": 6.979935158188091e-06, - "loss": 0.6543, + "epoch": 0.26, + "grad_norm": 1.9670540542979742, + "learning_rate": 8.647642422511549e-06, + "loss": 0.5301, "step": 3700 }, { - "epoch": 0.39, - "grad_norm": 2.144538256745873, - "learning_rate": 6.9783701161006965e-06, - "loss": 0.6848, + "epoch": 0.26, + "grad_norm": 0.9357515197082985, + "learning_rate": 8.646856352937363e-06, + "loss": 0.4782, "step": 3701 }, { - "epoch": 0.39, - "grad_norm": 2.2691334424929632, - "learning_rate": 6.976804844171978e-06, - "loss": 0.682, + "epoch": 0.26, + "grad_norm": 1.7962593882509972, + "learning_rate": 8.646070090723867e-06, + "loss": 0.5399, "step": 3702 }, { - "epoch": 0.39, - "grad_norm": 2.853203277677684, - "learning_rate": 6.975239342583789e-06, - "loss": 0.5798, + "epoch": 0.26, + "grad_norm": 1.9352727051643879, + "learning_rate": 8.645283635912596e-06, + "loss": 0.5246, "step": 3703 }, { - "epoch": 0.39, - "grad_norm": 0.984156618342594, - "learning_rate": 6.973673611518003e-06, - "loss": 0.6253, + "epoch": 0.26, + "grad_norm": 1.5605131436219404, + "learning_rate": 8.64449698854509e-06, + "loss": 0.605, "step": 3704 }, { - "epoch": 0.39, - "grad_norm": 3.6403380840996964, - "learning_rate": 6.972107651156521e-06, - "loss": 0.7099, + "epoch": 0.26, + "grad_norm": 1.9971586410050854, + "learning_rate": 8.643710148662906e-06, + "loss": 0.5997, "step": 3705 }, { - "epoch": 0.39, - "grad_norm": 3.0398511893391715, - "learning_rate": 6.970541461681274e-06, - "loss": 0.6499, + "epoch": 0.26, + "grad_norm": 0.8047786173159486, + "learning_rate": 8.642923116307603e-06, + "loss": 0.465, "step": 3706 }, { - "epoch": 0.39, - "grad_norm": 2.5614405274937453, - "learning_rate": 6.968975043274215e-06, - "loss": 0.5853, + "epoch": 0.26, + "grad_norm": 1.5070847680522728, + "learning_rate": 8.642135891520758e-06, + "loss": 0.5744, "step": 3707 }, { - "epoch": 0.39, - "grad_norm": 2.34052750735692, - "learning_rate": 6.9674083961173276e-06, - "loss": 0.7104, + "epoch": 0.26, + "grad_norm": 2.0518531271373974, + "learning_rate": 8.641348474343952e-06, + "loss": 0.5554, "step": 3708 }, { - "epoch": 0.39, - "grad_norm": 2.5089454252540015, - "learning_rate": 6.96584152039262e-06, - "loss": 0.6719, + "epoch": 0.26, + "grad_norm": 1.7760824430389066, + "learning_rate": 8.640560864818783e-06, + "loss": 0.5397, "step": 3709 }, { - "epoch": 0.39, - "grad_norm": 2.9541879261553783, - "learning_rate": 6.964274416282129e-06, - "loss": 0.6482, + "epoch": 0.26, + "grad_norm": 1.6663242788838637, + "learning_rate": 8.639773062986853e-06, + "loss": 0.5603, "step": 3710 }, { - "epoch": 0.39, - "grad_norm": 2.3885655104600265, - "learning_rate": 6.962707083967911e-06, - "loss": 0.5981, + "epoch": 0.26, + "grad_norm": 1.742370999078878, + "learning_rate": 8.638985068889776e-06, + "loss": 0.595, "step": 3711 }, { - "epoch": 0.39, - "grad_norm": 2.4321001454354527, - "learning_rate": 6.9611395236320615e-06, - "loss": 0.6967, + "epoch": 0.26, + "grad_norm": 1.5722380329580556, + "learning_rate": 8.638196882569178e-06, + "loss": 0.5317, "step": 3712 }, { - "epoch": 0.39, - "grad_norm": 1.1448344326564757, - "learning_rate": 6.959571735456687e-06, - "loss": 0.6432, + "epoch": 0.26, + "grad_norm": 1.8662209054568732, + "learning_rate": 8.637408504066693e-06, + "loss": 0.6059, "step": 3713 }, { - "epoch": 0.39, - "grad_norm": 2.652298799399858, - "learning_rate": 6.958003719623936e-06, - "loss": 0.7021, + "epoch": 0.26, + "grad_norm": 1.7239787585650452, + "learning_rate": 8.636619933423964e-06, + "loss": 0.5829, "step": 3714 }, { - "epoch": 0.39, - "grad_norm": 2.2558034126837194, - "learning_rate": 6.956435476315972e-06, - "loss": 0.7334, + "epoch": 0.26, + "grad_norm": 1.785041372854532, + "learning_rate": 8.635831170682649e-06, + "loss": 0.5839, "step": 3715 }, { - "epoch": 0.39, - "grad_norm": 2.089987806720731, - "learning_rate": 6.9548670057149896e-06, - "loss": 0.6904, + "epoch": 0.26, + "grad_norm": 1.6555696111711897, + "learning_rate": 8.63504221588441e-06, + "loss": 0.6736, "step": 3716 }, { - "epoch": 0.39, - "grad_norm": 3.0275655323101884, - "learning_rate": 6.953298308003209e-06, - "loss": 0.6388, + "epoch": 0.26, + "grad_norm": 1.8443604690101627, + "learning_rate": 8.634253069070927e-06, + "loss": 0.6179, "step": 3717 }, { - "epoch": 0.39, - "grad_norm": 2.6883335061425595, - "learning_rate": 6.9517293833628785e-06, - "loss": 0.666, + "epoch": 0.26, + "grad_norm": 1.9807881187593537, + "learning_rate": 8.63346373028388e-06, + "loss": 0.5606, "step": 3718 }, { - "epoch": 0.39, - "grad_norm": 4.102080658442691, - "learning_rate": 6.950160231976269e-06, - "loss": 0.6471, + "epoch": 0.26, + "grad_norm": 1.559039335188096, + "learning_rate": 8.632674199564967e-06, + "loss": 0.5693, "step": 3719 }, { - "epoch": 0.39, - "grad_norm": 2.547523830477747, - "learning_rate": 6.948590854025681e-06, - "loss": 0.6384, + "epoch": 0.26, + "grad_norm": 1.779293958552457, + "learning_rate": 8.631884476955895e-06, + "loss": 0.6245, "step": 3720 }, { - "epoch": 0.39, - "grad_norm": 2.9940684741992807, - "learning_rate": 6.947021249693442e-06, - "loss": 0.6313, + "epoch": 0.26, + "grad_norm": 1.8713892967362848, + "learning_rate": 8.631094562498376e-06, + "loss": 0.585, "step": 3721 }, { - "epoch": 0.39, - "grad_norm": 3.388166051557115, - "learning_rate": 6.9454514191619e-06, - "loss": 0.6452, + "epoch": 0.26, + "grad_norm": 1.7305979709166854, + "learning_rate": 8.630304456234141e-06, + "loss": 0.5321, "step": 3722 }, { - "epoch": 0.39, - "grad_norm": 2.5275961581874453, - "learning_rate": 6.9438813626134395e-06, - "loss": 0.5957, + "epoch": 0.26, + "grad_norm": 0.8083812027382703, + "learning_rate": 8.629514158204922e-06, + "loss": 0.4542, "step": 3723 }, { - "epoch": 0.39, - "grad_norm": 2.9456477007058033, - "learning_rate": 6.942311080230458e-06, - "loss": 0.6231, + "epoch": 0.26, + "grad_norm": 1.6812901600536156, + "learning_rate": 8.628723668452466e-06, + "loss": 0.57, "step": 3724 }, { - "epoch": 0.39, - "grad_norm": 2.085258818767328, - "learning_rate": 6.940740572195392e-06, - "loss": 0.6739, + "epoch": 0.26, + "grad_norm": 1.4465288763097162, + "learning_rate": 8.627932987018532e-06, + "loss": 0.5148, "step": 3725 }, { - "epoch": 0.39, - "grad_norm": 2.189417657638878, - "learning_rate": 6.939169838690695e-06, - "loss": 0.6297, + "epoch": 0.26, + "grad_norm": 1.5780342521316664, + "learning_rate": 8.627142113944884e-06, + "loss": 0.5373, "step": 3726 }, { - "epoch": 0.39, - "grad_norm": 2.502191462085905, - "learning_rate": 6.937598879898853e-06, - "loss": 0.6238, + "epoch": 0.26, + "grad_norm": 1.7767833381512546, + "learning_rate": 8.626351049273298e-06, + "loss": 0.568, "step": 3727 }, { - "epoch": 0.39, - "grad_norm": 2.4771620597343773, - "learning_rate": 6.936027696002373e-06, - "loss": 0.7406, + "epoch": 0.26, + "grad_norm": 1.623595698510962, + "learning_rate": 8.625559793045563e-06, + "loss": 0.5395, "step": 3728 }, { - "epoch": 0.39, - "grad_norm": 2.3821967150760526, - "learning_rate": 6.934456287183793e-06, - "loss": 0.7131, + "epoch": 0.26, + "grad_norm": 1.748915763495258, + "learning_rate": 8.624768345303474e-06, + "loss": 0.6349, "step": 3729 }, { - "epoch": 0.39, - "grad_norm": 2.537080782846546, - "learning_rate": 6.932884653625672e-06, - "loss": 0.587, + "epoch": 0.26, + "grad_norm": 1.6572425768247014, + "learning_rate": 8.623976706088838e-06, + "loss": 0.5697, "step": 3730 }, { - "epoch": 0.39, - "grad_norm": 4.044830093724147, - "learning_rate": 6.931312795510601e-06, - "loss": 0.6819, + "epoch": 0.26, + "grad_norm": 1.612014682458433, + "learning_rate": 8.623184875443473e-06, + "loss": 0.6026, "step": 3731 }, { - "epoch": 0.39, - "grad_norm": 2.060390451396102, - "learning_rate": 6.929740713021192e-06, - "loss": 0.6625, + "epoch": 0.26, + "grad_norm": 2.0850073105829607, + "learning_rate": 8.622392853409207e-06, + "loss": 0.6395, "step": 3732 }, { - "epoch": 0.39, - "grad_norm": 2.1547981802320297, - "learning_rate": 6.928168406340082e-06, - "loss": 0.7245, + "epoch": 0.26, + "grad_norm": 2.0968799923538044, + "learning_rate": 8.621600640027875e-06, + "loss": 0.6327, "step": 3733 }, { - "epoch": 0.39, - "grad_norm": 2.526368800735357, - "learning_rate": 6.926595875649944e-06, - "loss": 0.6918, + "epoch": 0.26, + "grad_norm": 2.2329802984870213, + "learning_rate": 8.620808235341327e-06, + "loss": 0.6201, "step": 3734 }, { - "epoch": 0.39, - "grad_norm": 2.751346484300469, - "learning_rate": 6.925023121133465e-06, - "loss": 0.5705, + "epoch": 0.27, + "grad_norm": 1.9200571085684917, + "learning_rate": 8.620015639391417e-06, + "loss": 0.5581, "step": 3735 }, { - "epoch": 0.39, - "grad_norm": 3.5756950930937323, - "learning_rate": 6.923450142973366e-06, - "loss": 0.7081, + "epoch": 0.27, + "grad_norm": 1.5383918939692969, + "learning_rate": 8.619222852220017e-06, + "loss": 0.5391, "step": 3736 }, { - "epoch": 0.39, - "grad_norm": 2.3081997699324925, - "learning_rate": 6.921876941352388e-06, - "loss": 0.6519, + "epoch": 0.27, + "grad_norm": 0.8706093720026479, + "learning_rate": 8.618429873869e-06, + "loss": 0.496, "step": 3737 }, { - "epoch": 0.39, - "grad_norm": 3.6782336966893183, - "learning_rate": 6.920303516453302e-06, - "loss": 0.6644, + "epoch": 0.27, + "grad_norm": 1.5157151226622232, + "learning_rate": 8.617636704380258e-06, + "loss": 0.5385, "step": 3738 }, { - "epoch": 0.39, - "grad_norm": 2.1786958735173174, - "learning_rate": 6.918729868458905e-06, - "loss": 0.6086, + "epoch": 0.27, + "grad_norm": 2.0404820421631547, + "learning_rate": 8.616843343795687e-06, + "loss": 0.5784, "step": 3739 }, { - "epoch": 0.39, - "grad_norm": 1.1080710822626574, - "learning_rate": 6.91715599755202e-06, - "loss": 0.5619, + "epoch": 0.27, + "grad_norm": 1.6806312285494946, + "learning_rate": 8.616049792157194e-06, + "loss": 0.5993, "step": 3740 }, { - "epoch": 0.39, - "grad_norm": 3.3882194917466837, - "learning_rate": 6.9155819039154914e-06, - "loss": 0.6884, + "epoch": 0.27, + "grad_norm": 1.7661721927753113, + "learning_rate": 8.615256049506697e-06, + "loss": 0.5449, "step": 3741 }, { - "epoch": 0.39, - "grad_norm": 2.2474420615290263, - "learning_rate": 6.9140075877321955e-06, - "loss": 0.7152, + "epoch": 0.27, + "grad_norm": 2.013088543803452, + "learning_rate": 8.614462115886126e-06, + "loss": 0.6335, "step": 3742 }, { - "epoch": 0.39, - "grad_norm": 2.3178490077460467, - "learning_rate": 6.91243304918503e-06, - "loss": 0.7279, + "epoch": 0.27, + "grad_norm": 1.9803522921385788, + "learning_rate": 8.61366799133742e-06, + "loss": 0.5432, "step": 3743 }, { - "epoch": 0.39, - "grad_norm": 2.915704362648865, - "learning_rate": 6.9108582884569206e-06, - "loss": 0.6947, + "epoch": 0.27, + "grad_norm": 1.6518134171212095, + "learning_rate": 8.612873675902525e-06, + "loss": 0.5367, "step": 3744 }, { - "epoch": 0.39, - "grad_norm": 2.4243198642597314, - "learning_rate": 6.909283305730822e-06, - "loss": 0.7129, + "epoch": 0.27, + "grad_norm": 0.787514026746798, + "learning_rate": 8.6120791696234e-06, + "loss": 0.4361, "step": 3745 }, { - "epoch": 0.39, - "grad_norm": 1.961387262313486, - "learning_rate": 6.907708101189705e-06, - "loss": 0.7032, + "epoch": 0.27, + "grad_norm": 1.8640576093607606, + "learning_rate": 8.611284472542015e-06, + "loss": 0.4731, "step": 3746 }, { - "epoch": 0.39, - "grad_norm": 2.7889776855961554, - "learning_rate": 6.906132675016577e-06, - "loss": 0.5773, + "epoch": 0.27, + "grad_norm": 1.8982704006669104, + "learning_rate": 8.610489584700348e-06, + "loss": 0.5312, "step": 3747 }, { - "epoch": 0.39, - "grad_norm": 4.106825781647384, - "learning_rate": 6.904557027394464e-06, - "loss": 0.5643, + "epoch": 0.27, + "grad_norm": 1.8518180494040857, + "learning_rate": 8.609694506140384e-06, + "loss": 0.4953, "step": 3748 }, { - "epoch": 0.39, - "grad_norm": 4.128759223475058, - "learning_rate": 6.902981158506421e-06, - "loss": 0.6652, + "epoch": 0.27, + "grad_norm": 0.8221609795065573, + "learning_rate": 8.608899236904128e-06, + "loss": 0.4396, "step": 3749 }, { - "epoch": 0.39, - "grad_norm": 1.0212995877258428, - "learning_rate": 6.90140506853553e-06, - "loss": 0.5981, + "epoch": 0.27, + "grad_norm": 2.136103030021321, + "learning_rate": 8.608103777033585e-06, + "loss": 0.506, "step": 3750 }, { - "epoch": 0.39, - "grad_norm": 2.410233497856306, - "learning_rate": 6.899828757664892e-06, - "loss": 0.6367, + "epoch": 0.27, + "grad_norm": 0.9100256992087814, + "learning_rate": 8.607308126570773e-06, + "loss": 0.4629, "step": 3751 }, { - "epoch": 0.39, - "grad_norm": 2.5548357809398037, - "learning_rate": 6.898252226077642e-06, - "loss": 0.6518, + "epoch": 0.27, + "grad_norm": 1.5416665482159027, + "learning_rate": 8.606512285557725e-06, + "loss": 0.5922, "step": 3752 }, { - "epoch": 0.39, - "grad_norm": 1.8928951894823056, - "learning_rate": 6.896675473956935e-06, - "loss": 0.6251, + "epoch": 0.27, + "grad_norm": 1.6206630681358223, + "learning_rate": 8.605716254036475e-06, + "loss": 0.5684, "step": 3753 }, { - "epoch": 0.4, - "grad_norm": 2.8686199304183004, - "learning_rate": 6.895098501485955e-06, - "loss": 0.6949, + "epoch": 0.27, + "grad_norm": 1.880549776972307, + "learning_rate": 8.604920032049075e-06, + "loss": 0.5451, "step": 3754 }, { - "epoch": 0.4, - "grad_norm": 2.4005163798318754, - "learning_rate": 6.8935213088479096e-06, - "loss": 0.6532, + "epoch": 0.27, + "grad_norm": 0.8477750250082801, + "learning_rate": 8.604123619637585e-06, + "loss": 0.4584, "step": 3755 }, { - "epoch": 0.4, - "grad_norm": 1.083088075895648, - "learning_rate": 6.891943896226031e-06, - "loss": 0.6361, + "epoch": 0.27, + "grad_norm": 2.6204696684290933, + "learning_rate": 8.603327016844073e-06, + "loss": 0.5388, "step": 3756 }, { - "epoch": 0.4, - "grad_norm": 2.3207947203497694, - "learning_rate": 6.890366263803579e-06, - "loss": 0.6775, + "epoch": 0.27, + "grad_norm": 1.494624616234771, + "learning_rate": 8.602530223710619e-06, + "loss": 0.6469, "step": 3757 }, { - "epoch": 0.4, - "grad_norm": 2.8029690208205515, - "learning_rate": 6.88878841176384e-06, - "loss": 0.614, + "epoch": 0.27, + "grad_norm": 1.7529979709601866, + "learning_rate": 8.601733240279308e-06, + "loss": 0.5307, "step": 3758 }, { - "epoch": 0.4, - "grad_norm": 1.913955180413663, - "learning_rate": 6.887210340290124e-06, - "loss": 0.6718, + "epoch": 0.27, + "grad_norm": 1.6376235693132724, + "learning_rate": 8.600936066592247e-06, + "loss": 0.5933, "step": 3759 }, { - "epoch": 0.4, - "grad_norm": 2.4336755807961237, - "learning_rate": 6.885632049565766e-06, - "loss": 0.6733, + "epoch": 0.27, + "grad_norm": 2.173461684596855, + "learning_rate": 8.600138702691538e-06, + "loss": 0.5793, "step": 3760 }, { - "epoch": 0.4, - "grad_norm": 3.0818816161132827, - "learning_rate": 6.884053539774125e-06, - "loss": 0.6907, + "epoch": 0.27, + "grad_norm": 6.089061327928108, + "learning_rate": 8.599341148619306e-06, + "loss": 0.593, "step": 3761 }, { - "epoch": 0.4, - "grad_norm": 4.6742613246704785, - "learning_rate": 6.882474811098592e-06, - "loss": 0.6829, + "epoch": 0.27, + "grad_norm": 1.8191289446129948, + "learning_rate": 8.598543404417678e-06, + "loss": 0.5995, "step": 3762 }, { - "epoch": 0.4, - "grad_norm": 1.9849851340007725, - "learning_rate": 6.880895863722576e-06, - "loss": 0.7172, + "epoch": 0.27, + "grad_norm": 3.1441988238525727, + "learning_rate": 8.597745470128794e-06, + "loss": 0.5621, "step": 3763 }, { - "epoch": 0.4, - "grad_norm": 3.0447314238097247, - "learning_rate": 6.8793166978295166e-06, - "loss": 0.631, + "epoch": 0.27, + "grad_norm": 1.8071413387992534, + "learning_rate": 8.596947345794801e-06, + "loss": 0.5233, "step": 3764 }, { - "epoch": 0.4, - "grad_norm": 3.91470175770067, - "learning_rate": 6.877737313602876e-06, - "loss": 0.6654, + "epoch": 0.27, + "grad_norm": 1.5440797659628793, + "learning_rate": 8.596149031457865e-06, + "loss": 0.531, "step": 3765 }, { - "epoch": 0.4, - "grad_norm": 2.1785559232413867, - "learning_rate": 6.8761577112261425e-06, - "loss": 0.6318, + "epoch": 0.27, + "grad_norm": 2.0068733081174424, + "learning_rate": 8.595350527160149e-06, + "loss": 0.5612, "step": 3766 }, { - "epoch": 0.4, - "grad_norm": 2.669250198673815, - "learning_rate": 6.874577890882829e-06, - "loss": 0.6375, + "epoch": 0.27, + "grad_norm": 1.8228956837951042, + "learning_rate": 8.594551832943837e-06, + "loss": 0.5501, "step": 3767 }, { - "epoch": 0.4, - "grad_norm": 2.9415389760168593, - "learning_rate": 6.872997852756474e-06, - "loss": 0.6013, + "epoch": 0.27, + "grad_norm": 2.4745031533624027, + "learning_rate": 8.593752948851115e-06, + "loss": 0.5942, "step": 3768 }, { - "epoch": 0.4, - "grad_norm": 2.4526688080708596, - "learning_rate": 6.871417597030644e-06, - "loss": 0.6684, + "epoch": 0.27, + "grad_norm": 2.4569583524023715, + "learning_rate": 8.592953874924187e-06, + "loss": 0.543, "step": 3769 }, { - "epoch": 0.4, - "grad_norm": 2.788823894332462, - "learning_rate": 6.869837123888926e-06, - "loss": 0.6253, + "epoch": 0.27, + "grad_norm": 2.0780232098348477, + "learning_rate": 8.59215461120526e-06, + "loss": 0.627, "step": 3770 }, { - "epoch": 0.4, - "grad_norm": 4.477851029463746, - "learning_rate": 6.868256433514938e-06, - "loss": 0.7383, + "epoch": 0.27, + "grad_norm": 1.485867233128486, + "learning_rate": 8.591355157736555e-06, + "loss": 0.6047, "step": 3771 }, { - "epoch": 0.4, - "grad_norm": 2.269796038141395, - "learning_rate": 6.866675526092317e-06, - "loss": 0.7487, + "epoch": 0.27, + "grad_norm": 1.78619071927407, + "learning_rate": 8.5905555145603e-06, + "loss": 0.5193, "step": 3772 }, { - "epoch": 0.4, - "grad_norm": 2.648425239153188, - "learning_rate": 6.86509440180473e-06, - "loss": 0.7038, + "epoch": 0.27, + "grad_norm": 1.7032473602452767, + "learning_rate": 8.589755681718737e-06, + "loss": 0.5146, "step": 3773 }, { - "epoch": 0.4, - "grad_norm": 2.375374473672006, - "learning_rate": 6.863513060835866e-06, - "loss": 0.6327, + "epoch": 0.27, + "grad_norm": 1.707653761119075, + "learning_rate": 8.588955659254117e-06, + "loss": 0.5946, "step": 3774 }, { - "epoch": 0.4, - "grad_norm": 1.1344879105982528, - "learning_rate": 6.861931503369441e-06, - "loss": 0.6503, + "epoch": 0.27, + "grad_norm": 1.6047857631036928, + "learning_rate": 8.588155447208695e-06, + "loss": 0.5698, "step": 3775 }, { - "epoch": 0.4, - "grad_norm": 3.840442440534222, - "learning_rate": 6.8603497295891975e-06, - "loss": 0.5909, + "epoch": 0.27, + "grad_norm": 2.9266184126070436, + "learning_rate": 8.587355045624746e-06, + "loss": 0.57, "step": 3776 }, { - "epoch": 0.4, - "grad_norm": 3.9504848651490403, - "learning_rate": 6.8587677396789e-06, - "loss": 0.6947, + "epoch": 0.27, + "grad_norm": 1.5711485289073448, + "learning_rate": 8.586554454544547e-06, + "loss": 0.4948, "step": 3777 }, { - "epoch": 0.4, - "grad_norm": 2.764177497995866, - "learning_rate": 6.8571855338223395e-06, - "loss": 0.6062, + "epoch": 0.27, + "grad_norm": 3.6648091776827965, + "learning_rate": 8.58575367401039e-06, + "loss": 0.512, "step": 3778 }, { - "epoch": 0.4, - "grad_norm": 3.440160369843259, - "learning_rate": 6.855603112203333e-06, - "loss": 0.6853, + "epoch": 0.27, + "grad_norm": 1.4927965316453047, + "learning_rate": 8.584952704064572e-06, + "loss": 0.5035, "step": 3779 }, { - "epoch": 0.4, - "grad_norm": 2.2362406157836525, - "learning_rate": 6.854020475005719e-06, - "loss": 0.6329, + "epoch": 0.27, + "grad_norm": 2.4557268915209147, + "learning_rate": 8.584151544749407e-06, + "loss": 0.5932, "step": 3780 }, { - "epoch": 0.4, - "grad_norm": 1.1361521836950608, - "learning_rate": 6.852437622413366e-06, - "loss": 0.5591, + "epoch": 0.27, + "grad_norm": 0.8295266162640058, + "learning_rate": 8.583350196107209e-06, + "loss": 0.4965, "step": 3781 }, { - "epoch": 0.4, - "grad_norm": 2.2958928068872213, - "learning_rate": 6.850854554610167e-06, - "loss": 0.7093, + "epoch": 0.27, + "grad_norm": 1.6084824433679665, + "learning_rate": 8.582548658180314e-06, + "loss": 0.5931, "step": 3782 }, { - "epoch": 0.4, - "grad_norm": 2.311428807633486, - "learning_rate": 6.849271271780034e-06, - "loss": 0.6284, + "epoch": 0.27, + "grad_norm": 1.9301530304262977, + "learning_rate": 8.581746931011058e-06, + "loss": 0.5536, "step": 3783 }, { - "epoch": 0.4, - "grad_norm": 2.8763588330667194, - "learning_rate": 6.847687774106911e-06, - "loss": 0.6576, + "epoch": 0.27, + "grad_norm": 2.3148343399984417, + "learning_rate": 8.580945014641795e-06, + "loss": 0.5532, "step": 3784 }, { - "epoch": 0.4, - "grad_norm": 2.382119861964838, - "learning_rate": 6.846104061774763e-06, - "loss": 0.6577, + "epoch": 0.27, + "grad_norm": 1.7396558761754162, + "learning_rate": 8.580142909114881e-06, + "loss": 0.652, "step": 3785 }, { - "epoch": 0.4, - "grad_norm": 2.0655614717842052, - "learning_rate": 6.8445201349675825e-06, - "loss": 0.6832, + "epoch": 0.27, + "grad_norm": 1.8800748154454312, + "learning_rate": 8.579340614472688e-06, + "loss": 0.5488, "step": 3786 }, { - "epoch": 0.4, - "grad_norm": 2.8485737316689894, - "learning_rate": 6.842935993869385e-06, - "loss": 0.6554, + "epoch": 0.27, + "grad_norm": 1.5390932382544662, + "learning_rate": 8.578538130757596e-06, + "loss": 0.501, "step": 3787 }, { - "epoch": 0.4, - "grad_norm": 2.6641033052353102, - "learning_rate": 6.841351638664211e-06, - "loss": 0.7066, + "epoch": 0.27, + "grad_norm": 2.4376948941532373, + "learning_rate": 8.577735458011994e-06, + "loss": 0.5726, "step": 3788 }, { - "epoch": 0.4, - "grad_norm": 2.636994092546543, - "learning_rate": 6.839767069536126e-06, - "loss": 0.5704, + "epoch": 0.27, + "grad_norm": 2.7578687527353734, + "learning_rate": 8.57693259627828e-06, + "loss": 0.5151, "step": 3789 }, { - "epoch": 0.4, - "grad_norm": 2.1059693268960524, - "learning_rate": 6.838182286669222e-06, - "loss": 0.6862, + "epoch": 0.27, + "grad_norm": 3.11395877438302, + "learning_rate": 8.576129545598867e-06, + "loss": 0.5914, "step": 3790 }, { - "epoch": 0.4, - "grad_norm": 2.389714565065946, - "learning_rate": 6.8365972902476115e-06, - "loss": 0.6674, + "epoch": 0.27, + "grad_norm": 1.7009293799988963, + "learning_rate": 8.575326306016173e-06, + "loss": 0.5136, "step": 3791 }, { - "epoch": 0.4, - "grad_norm": 2.3867920333404187, - "learning_rate": 6.835012080455439e-06, - "loss": 0.6521, + "epoch": 0.27, + "grad_norm": 2.566269272610906, + "learning_rate": 8.57452287757263e-06, + "loss": 0.5633, "step": 3792 }, { - "epoch": 0.4, - "grad_norm": 2.6159918503535615, - "learning_rate": 6.833426657476866e-06, - "loss": 0.6519, + "epoch": 0.27, + "grad_norm": 1.60928332987856, + "learning_rate": 8.573719260310676e-06, + "loss": 0.5138, "step": 3793 }, { - "epoch": 0.4, - "grad_norm": 2.226258850434491, - "learning_rate": 6.831841021496084e-06, - "loss": 0.7133, + "epoch": 0.27, + "grad_norm": 1.781189315892919, + "learning_rate": 8.572915454272762e-06, + "loss": 0.5968, "step": 3794 }, { - "epoch": 0.4, - "grad_norm": 2.899461642291162, - "learning_rate": 6.830255172697309e-06, - "loss": 0.6671, + "epoch": 0.27, + "grad_norm": 2.4190379140864304, + "learning_rate": 8.572111459501346e-06, + "loss": 0.5714, "step": 3795 }, { - "epoch": 0.4, - "grad_norm": 2.3305187723569447, - "learning_rate": 6.828669111264776e-06, - "loss": 0.6724, + "epoch": 0.27, + "grad_norm": 2.7533159365540176, + "learning_rate": 8.571307276038898e-06, + "loss": 0.4999, "step": 3796 }, { - "epoch": 0.4, - "grad_norm": 2.4055301005112946, - "learning_rate": 6.8270828373827536e-06, - "loss": 0.8054, + "epoch": 0.27, + "grad_norm": 0.7874564564287987, + "learning_rate": 8.5705029039279e-06, + "loss": 0.4434, "step": 3797 }, { - "epoch": 0.4, - "grad_norm": 2.2070086468105874, - "learning_rate": 6.825496351235528e-06, - "loss": 0.6663, + "epoch": 0.27, + "grad_norm": 2.1754908626180747, + "learning_rate": 8.56969834321084e-06, + "loss": 0.6336, "step": 3798 }, { - "epoch": 0.4, - "grad_norm": 2.465861609693202, - "learning_rate": 6.823909653007414e-06, - "loss": 0.6668, + "epoch": 0.27, + "grad_norm": 1.8820861037973569, + "learning_rate": 8.568893593930217e-06, + "loss": 0.541, "step": 3799 }, { - "epoch": 0.4, - "grad_norm": 2.7446295867469264, - "learning_rate": 6.8223227428827485e-06, - "loss": 0.6663, + "epoch": 0.27, + "grad_norm": 1.6406993139989712, + "learning_rate": 8.568088656128542e-06, + "loss": 0.571, "step": 3800 }, { - "epoch": 0.4, - "grad_norm": 2.2768346355361744, - "learning_rate": 6.820735621045895e-06, - "loss": 0.6675, + "epoch": 0.27, + "grad_norm": 1.8096483607658562, + "learning_rate": 8.567283529848333e-06, + "loss": 0.5067, "step": 3801 }, { - "epoch": 0.4, - "grad_norm": 2.3820614356193843, - "learning_rate": 6.81914828768124e-06, - "loss": 0.689, + "epoch": 0.27, + "grad_norm": 1.571279842775095, + "learning_rate": 8.566478215132121e-06, + "loss": 0.5589, "step": 3802 }, { - "epoch": 0.4, - "grad_norm": 2.993505071374713, - "learning_rate": 6.817560742973196e-06, - "loss": 0.7702, + "epoch": 0.27, + "grad_norm": 1.7181970334415946, + "learning_rate": 8.565672712022443e-06, + "loss": 0.5143, "step": 3803 }, { - "epoch": 0.4, - "grad_norm": 2.25574301607733, - "learning_rate": 6.8159729871061984e-06, - "loss": 0.6731, + "epoch": 0.27, + "grad_norm": 1.5794291033700176, + "learning_rate": 8.564867020561853e-06, + "loss": 0.614, "step": 3804 }, { - "epoch": 0.4, - "grad_norm": 3.880274384584933, - "learning_rate": 6.814385020264708e-06, - "loss": 0.7064, + "epoch": 0.27, + "grad_norm": 1.6888873843226395, + "learning_rate": 8.564061140792905e-06, + "loss": 0.5909, "step": 3805 }, { - "epoch": 0.4, - "grad_norm": 2.6969745237833136, - "learning_rate": 6.812796842633213e-06, - "loss": 0.7117, + "epoch": 0.27, + "grad_norm": 1.8383682005543078, + "learning_rate": 8.563255072758174e-06, + "loss": 0.619, "step": 3806 }, { - "epoch": 0.4, - "grad_norm": 2.10614106669541, - "learning_rate": 6.811208454396218e-06, - "loss": 0.7491, + "epoch": 0.27, + "grad_norm": 1.619699238933111, + "learning_rate": 8.562448816500234e-06, + "loss": 0.5617, "step": 3807 }, { - "epoch": 0.4, - "grad_norm": 4.469718689532213, - "learning_rate": 6.809619855738262e-06, - "loss": 0.6633, + "epoch": 0.27, + "grad_norm": 1.832593864032716, + "learning_rate": 8.561642372061676e-06, + "loss": 0.5444, "step": 3808 }, { - "epoch": 0.4, - "grad_norm": 2.6641844592932244, - "learning_rate": 6.8080310468439015e-06, - "loss": 0.6784, + "epoch": 0.27, + "grad_norm": 1.5581582719820313, + "learning_rate": 8.560835739485101e-06, + "loss": 0.6104, "step": 3809 }, { - "epoch": 0.4, - "grad_norm": 6.960823525262656, - "learning_rate": 6.806442027897722e-06, - "loss": 0.6114, + "epoch": 0.27, + "grad_norm": 1.778132074344884, + "learning_rate": 8.560028918813115e-06, + "loss": 0.5947, "step": 3810 }, { - "epoch": 0.4, - "grad_norm": 1.963680773700924, - "learning_rate": 6.804852799084329e-06, - "loss": 0.6952, + "epoch": 0.27, + "grad_norm": 0.9323600435113633, + "learning_rate": 8.559221910088338e-06, + "loss": 0.4877, "step": 3811 }, { - "epoch": 0.4, - "grad_norm": 2.327360252561889, - "learning_rate": 6.803263360588355e-06, - "loss": 0.6871, + "epoch": 0.27, + "grad_norm": 1.851837495849415, + "learning_rate": 8.558414713353402e-06, + "loss": 0.5353, "step": 3812 }, { - "epoch": 0.4, - "grad_norm": 2.6946911855419358, - "learning_rate": 6.801673712594456e-06, - "loss": 0.7192, + "epoch": 0.27, + "grad_norm": 1.636943974668802, + "learning_rate": 8.55760732865094e-06, + "loss": 0.541, "step": 3813 }, { - "epoch": 0.4, - "grad_norm": 2.7849793322013197, - "learning_rate": 6.8000838552873135e-06, - "loss": 0.6403, + "epoch": 0.27, + "grad_norm": 1.7723075791422664, + "learning_rate": 8.556799756023607e-06, + "loss": 0.5618, "step": 3814 }, { - "epoch": 0.4, - "grad_norm": 2.3535336364449804, - "learning_rate": 6.7984937888516325e-06, - "loss": 0.708, + "epoch": 0.27, + "grad_norm": 0.7992390308027445, + "learning_rate": 8.555991995514056e-06, + "loss": 0.459, "step": 3815 }, { - "epoch": 0.4, - "grad_norm": 4.483684306418722, - "learning_rate": 6.796903513472142e-06, - "loss": 0.6605, + "epoch": 0.27, + "grad_norm": 1.5088602109566804, + "learning_rate": 8.555184047164961e-06, + "loss": 0.503, "step": 3816 }, { - "epoch": 0.4, - "grad_norm": 3.157785406165776, - "learning_rate": 6.795313029333596e-06, - "loss": 0.576, + "epoch": 0.27, + "grad_norm": 1.7718530971660045, + "learning_rate": 8.554375911018997e-06, + "loss": 0.5972, "step": 3817 }, { - "epoch": 0.4, - "grad_norm": 2.243032193761467, - "learning_rate": 6.79372233662077e-06, - "loss": 0.5717, + "epoch": 0.27, + "grad_norm": 0.7862736833270481, + "learning_rate": 8.553567587118853e-06, + "loss": 0.451, "step": 3818 }, { - "epoch": 0.4, - "grad_norm": 3.0101289906232367, - "learning_rate": 6.79213143551847e-06, - "loss": 0.5542, + "epoch": 0.27, + "grad_norm": 3.36103182655997, + "learning_rate": 8.552759075507229e-06, + "loss": 0.5339, "step": 3819 }, { - "epoch": 0.4, - "grad_norm": 3.8401966485891643, - "learning_rate": 6.790540326211519e-06, - "loss": 0.6712, + "epoch": 0.27, + "grad_norm": 1.7839866680797638, + "learning_rate": 8.551950376226831e-06, + "loss": 0.5399, "step": 3820 }, { - "epoch": 0.4, - "grad_norm": 2.757074235173517, - "learning_rate": 6.78894900888477e-06, - "loss": 0.6846, + "epoch": 0.27, + "grad_norm": 1.7623223127474545, + "learning_rate": 8.55114148932038e-06, + "loss": 0.5581, "step": 3821 }, { - "epoch": 0.4, - "grad_norm": 4.674263473701696, - "learning_rate": 6.787357483723096e-06, - "loss": 0.7419, + "epoch": 0.27, + "grad_norm": 2.8185050734153316, + "learning_rate": 8.550332414830602e-06, + "loss": 0.6203, "step": 3822 }, { - "epoch": 0.4, - "grad_norm": 2.606110641324874, - "learning_rate": 6.785765750911396e-06, - "loss": 0.6317, + "epoch": 0.27, + "grad_norm": 1.6406746671890167, + "learning_rate": 8.549523152800235e-06, + "loss": 0.522, "step": 3823 }, { - "epoch": 0.4, - "grad_norm": 2.1675322487659723, - "learning_rate": 6.7841738106345935e-06, - "loss": 0.6373, + "epoch": 0.27, + "grad_norm": 1.6488788624415496, + "learning_rate": 8.548713703272029e-06, + "loss": 0.5502, "step": 3824 }, { - "epoch": 0.4, - "grad_norm": 2.2312923273279153, - "learning_rate": 6.782581663077637e-06, - "loss": 0.6132, + "epoch": 0.27, + "grad_norm": 1.7197370923548907, + "learning_rate": 8.547904066288738e-06, + "loss": 0.5425, "step": 3825 }, { - "epoch": 0.4, - "grad_norm": 2.6581842644039164, - "learning_rate": 6.780989308425493e-06, - "loss": 0.6928, + "epoch": 0.27, + "grad_norm": 1.7148865951696295, + "learning_rate": 8.547094241893135e-06, + "loss": 0.5251, "step": 3826 }, { - "epoch": 0.4, - "grad_norm": 2.246504818054555, - "learning_rate": 6.77939674686316e-06, - "loss": 0.6127, + "epoch": 0.27, + "grad_norm": 1.8466129668566087, + "learning_rate": 8.546284230127994e-06, + "loss": 0.5415, "step": 3827 }, { - "epoch": 0.4, - "grad_norm": 2.7698626969798905, - "learning_rate": 6.7778039785756575e-06, - "loss": 0.6525, + "epoch": 0.27, + "grad_norm": 0.8178112851284088, + "learning_rate": 8.545474031036102e-06, + "loss": 0.4582, "step": 3828 }, { - "epoch": 0.4, - "grad_norm": 2.4450311084573038, - "learning_rate": 6.7762110037480265e-06, - "loss": 0.6343, + "epoch": 0.27, + "grad_norm": 1.9323978237351618, + "learning_rate": 8.544663644660261e-06, + "loss": 0.5638, "step": 3829 }, { - "epoch": 0.4, - "grad_norm": 4.123523847157009, - "learning_rate": 6.774617822565338e-06, - "loss": 0.6966, + "epoch": 0.27, + "grad_norm": 1.884021669557772, + "learning_rate": 8.543853071043273e-06, + "loss": 0.59, "step": 3830 }, { - "epoch": 0.4, - "grad_norm": 2.7410973711990247, - "learning_rate": 6.773024435212678e-06, - "loss": 0.6834, + "epoch": 0.27, + "grad_norm": 0.9114240533091512, + "learning_rate": 8.54304231022796e-06, + "loss": 0.4735, "step": 3831 }, { - "epoch": 0.4, - "grad_norm": 10.729971927019776, - "learning_rate": 6.771430841875166e-06, - "loss": 0.7335, + "epoch": 0.27, + "grad_norm": 1.7679818359024413, + "learning_rate": 8.542231362257144e-06, + "loss": 0.5297, "step": 3832 }, { - "epoch": 0.4, - "grad_norm": 2.461919317112508, - "learning_rate": 6.7698370427379405e-06, - "loss": 0.7462, + "epoch": 0.27, + "grad_norm": 2.1708690414638694, + "learning_rate": 8.541420227173666e-06, + "loss": 0.552, "step": 3833 }, { - "epoch": 0.4, - "grad_norm": 2.764361831143325, - "learning_rate": 6.7682430379861615e-06, - "loss": 0.6316, + "epoch": 0.27, + "grad_norm": 2.2438519295705546, + "learning_rate": 8.540608905020372e-06, + "loss": 0.5875, "step": 3834 }, { - "epoch": 0.4, - "grad_norm": 2.09261926247929, - "learning_rate": 6.766648827805019e-06, - "loss": 0.6006, + "epoch": 0.27, + "grad_norm": 2.213655710984006, + "learning_rate": 8.539797395840117e-06, + "loss": 0.587, "step": 3835 }, { - "epoch": 0.4, - "grad_norm": 3.804234407107111, - "learning_rate": 6.765054412379722e-06, - "loss": 0.6852, + "epoch": 0.27, + "grad_norm": 1.8008708799469018, + "learning_rate": 8.53898569967577e-06, + "loss": 0.5484, "step": 3836 }, { - "epoch": 0.4, - "grad_norm": 2.798217238649048, - "learning_rate": 6.763459791895506e-06, - "loss": 0.6397, + "epoch": 0.27, + "grad_norm": 2.0264993405539036, + "learning_rate": 8.538173816570205e-06, + "loss": 0.5089, "step": 3837 }, { - "epoch": 0.4, - "grad_norm": 2.6552060440599377, - "learning_rate": 6.761864966537629e-06, - "loss": 0.5773, + "epoch": 0.27, + "grad_norm": 2.8067602109380463, + "learning_rate": 8.537361746566312e-06, + "loss": 0.5395, "step": 3838 }, { - "epoch": 0.4, - "grad_norm": 2.0551205343106513, - "learning_rate": 6.760269936491373e-06, - "loss": 0.7127, + "epoch": 0.27, + "grad_norm": 1.4753260055086794, + "learning_rate": 8.536549489706985e-06, + "loss": 0.5459, "step": 3839 }, { - "epoch": 0.4, - "grad_norm": 2.2555093947538323, - "learning_rate": 6.7586747019420444e-06, - "loss": 0.6463, + "epoch": 0.27, + "grad_norm": 1.8257957200420598, + "learning_rate": 8.535737046035131e-06, + "loss": 0.5921, "step": 3840 }, { - "epoch": 0.4, - "grad_norm": 2.508518018543992, - "learning_rate": 6.7570792630749725e-06, - "loss": 0.6151, + "epoch": 0.27, + "grad_norm": 1.7109200196763055, + "learning_rate": 8.534924415593664e-06, + "loss": 0.5327, "step": 3841 }, { - "epoch": 0.4, - "grad_norm": 2.2115480509362024, - "learning_rate": 6.755483620075509e-06, - "loss": 0.6153, + "epoch": 0.27, + "grad_norm": 1.3834273373720936, + "learning_rate": 8.534111598425511e-06, + "loss": 0.4793, "step": 3842 }, { - "epoch": 0.4, - "grad_norm": 2.5571778252409416, - "learning_rate": 6.753887773129036e-06, - "loss": 0.6822, + "epoch": 0.27, + "grad_norm": 1.7605628394301451, + "learning_rate": 8.533298594573611e-06, + "loss": 0.57, "step": 3843 }, { - "epoch": 0.4, - "grad_norm": 3.3827224492687704, - "learning_rate": 6.752291722420951e-06, - "loss": 0.6651, + "epoch": 0.27, + "grad_norm": 2.1854642283785393, + "learning_rate": 8.532485404080905e-06, + "loss": 0.5814, "step": 3844 }, { - "epoch": 0.4, - "grad_norm": 5.068263890882636, - "learning_rate": 6.750695468136679e-06, - "loss": 0.7441, + "epoch": 0.27, + "grad_norm": 1.852546639449303, + "learning_rate": 8.53167202699035e-06, + "loss": 0.5651, "step": 3845 }, { - "epoch": 0.4, - "grad_norm": 2.599488472042048, - "learning_rate": 6.7490990104616684e-06, - "loss": 0.6053, + "epoch": 0.27, + "grad_norm": 1.788869111915241, + "learning_rate": 8.530858463344913e-06, + "loss": 0.6052, "step": 3846 }, { - "epoch": 0.4, - "grad_norm": 5.068319078405129, - "learning_rate": 6.74750234958139e-06, - "loss": 0.6563, + "epoch": 0.27, + "grad_norm": 1.8451966123887178, + "learning_rate": 8.530044713187566e-06, + "loss": 0.5237, "step": 3847 }, { - "epoch": 0.4, - "grad_norm": 3.4661100791938293, - "learning_rate": 6.745905485681341e-06, - "loss": 0.605, + "epoch": 0.27, + "grad_norm": 1.8552291128348275, + "learning_rate": 8.529230776561297e-06, + "loss": 0.5284, "step": 3848 }, { - "epoch": 0.41, - "grad_norm": 2.8783604735605857, - "learning_rate": 6.7443084189470385e-06, - "loss": 0.6415, + "epoch": 0.27, + "grad_norm": 2.0832717237403875, + "learning_rate": 8.5284166535091e-06, + "loss": 0.6644, "step": 3849 }, { - "epoch": 0.41, - "grad_norm": 4.159785400047253, - "learning_rate": 6.742711149564028e-06, - "loss": 0.7075, + "epoch": 0.27, + "grad_norm": 1.5108183483882374, + "learning_rate": 8.527602344073979e-06, + "loss": 0.5007, "step": 3850 }, { - "epoch": 0.41, - "grad_norm": 2.5896401663997524, - "learning_rate": 6.741113677717872e-06, - "loss": 0.7611, + "epoch": 0.27, + "grad_norm": 1.7247616673776254, + "learning_rate": 8.526787848298947e-06, + "loss": 0.5446, "step": 3851 }, { - "epoch": 0.41, - "grad_norm": 3.423614667213638, - "learning_rate": 6.7395160035941624e-06, - "loss": 0.7156, + "epoch": 0.27, + "grad_norm": 1.6720954318137098, + "learning_rate": 8.525973166227033e-06, + "loss": 0.5414, "step": 3852 }, { - "epoch": 0.41, - "grad_norm": 2.874352439060714, - "learning_rate": 6.737918127378511e-06, - "loss": 0.5727, + "epoch": 0.27, + "grad_norm": 1.8452923596805926, + "learning_rate": 8.525158297901268e-06, + "loss": 0.563, "step": 3853 }, { - "epoch": 0.41, - "grad_norm": 2.2310646192841106, - "learning_rate": 6.736320049256557e-06, - "loss": 0.6234, + "epoch": 0.27, + "grad_norm": 1.4553929112887263, + "learning_rate": 8.524343243364697e-06, + "loss": 0.5259, "step": 3854 }, { - "epoch": 0.41, - "grad_norm": 3.078116761718466, - "learning_rate": 6.734721769413959e-06, - "loss": 0.6838, + "epoch": 0.27, + "grad_norm": 1.591795924587381, + "learning_rate": 8.523528002660374e-06, + "loss": 0.5442, "step": 3855 }, { - "epoch": 0.41, - "grad_norm": 3.047167191192513, - "learning_rate": 6.733123288036399e-06, - "loss": 0.739, + "epoch": 0.27, + "grad_norm": 1.8604114879662283, + "learning_rate": 8.522712575831363e-06, + "loss": 0.5204, "step": 3856 }, { - "epoch": 0.41, - "grad_norm": 2.98595396084378, - "learning_rate": 6.731524605309587e-06, - "loss": 0.6049, + "epoch": 0.27, + "grad_norm": 1.5923104445159237, + "learning_rate": 8.521896962920735e-06, + "loss": 0.5193, "step": 3857 }, { - "epoch": 0.41, - "grad_norm": 2.152132778242366, - "learning_rate": 6.72992572141925e-06, - "loss": 0.6779, + "epoch": 0.27, + "grad_norm": 1.5970865677416415, + "learning_rate": 8.521081163971579e-06, + "loss": 0.561, "step": 3858 }, { - "epoch": 0.41, - "grad_norm": 3.411218764669821, - "learning_rate": 6.728326636551145e-06, - "loss": 0.6093, + "epoch": 0.27, + "grad_norm": 3.0666850625361888, + "learning_rate": 8.520265179026981e-06, + "loss": 0.5682, "step": 3859 }, { - "epoch": 0.41, - "grad_norm": 2.1157066121453587, - "learning_rate": 6.726727350891047e-06, - "loss": 0.6165, + "epoch": 0.27, + "grad_norm": 1.7932012844764185, + "learning_rate": 8.51944900813005e-06, + "loss": 0.5419, "step": 3860 }, { - "epoch": 0.41, - "grad_norm": 2.068433705431225, - "learning_rate": 6.725127864624757e-06, - "loss": 0.5947, + "epoch": 0.27, + "grad_norm": 1.8680871795908036, + "learning_rate": 8.518632651323897e-06, + "loss": 0.5482, "step": 3861 }, { - "epoch": 0.41, - "grad_norm": 2.8708815594354373, - "learning_rate": 6.723528177938097e-06, - "loss": 0.6354, + "epoch": 0.27, + "grad_norm": 1.760047374024755, + "learning_rate": 8.517816108651644e-06, + "loss": 0.6172, "step": 3862 }, { - "epoch": 0.41, - "grad_norm": 3.33467858012365, - "learning_rate": 6.721928291016917e-06, - "loss": 0.7318, + "epoch": 0.27, + "grad_norm": 1.8698498765309115, + "learning_rate": 8.516999380156426e-06, + "loss": 0.6412, "step": 3863 }, { - "epoch": 0.41, - "grad_norm": 2.803975371742299, - "learning_rate": 6.720328204047085e-06, - "loss": 0.6814, + "epoch": 0.27, + "grad_norm": 1.6892544366605822, + "learning_rate": 8.516182465881381e-06, + "loss": 0.583, "step": 3864 }, { - "epoch": 0.41, - "grad_norm": 1.1642857039266927, - "learning_rate": 6.718727917214496e-06, - "loss": 0.6067, + "epoch": 0.27, + "grad_norm": 1.7038972228868394, + "learning_rate": 8.515365365869665e-06, + "loss": 0.6086, "step": 3865 }, { - "epoch": 0.41, - "grad_norm": 2.80771219731655, - "learning_rate": 6.7171274307050645e-06, - "loss": 0.6535, + "epoch": 0.27, + "grad_norm": 0.8723614717023433, + "learning_rate": 8.514548080164437e-06, + "loss": 0.4765, "step": 3866 }, { - "epoch": 0.41, - "grad_norm": 2.0379961191421874, - "learning_rate": 6.715526744704732e-06, - "loss": 0.6385, + "epoch": 0.27, + "grad_norm": 1.5350372353366104, + "learning_rate": 8.513730608808873e-06, + "loss": 0.5429, "step": 3867 }, { - "epoch": 0.41, - "grad_norm": 2.2538626104452626, - "learning_rate": 6.7139258593994625e-06, - "loss": 0.6347, + "epoch": 0.27, + "grad_norm": 1.5025155623128448, + "learning_rate": 8.51291295184615e-06, + "loss": 0.574, "step": 3868 }, { - "epoch": 0.41, - "grad_norm": 2.5560569234327355, - "learning_rate": 6.712324774975241e-06, - "loss": 0.6909, + "epoch": 0.27, + "grad_norm": 1.6465849786177045, + "learning_rate": 8.512095109319464e-06, + "loss": 0.5832, "step": 3869 }, { - "epoch": 0.41, - "grad_norm": 2.4724982923209953, - "learning_rate": 6.710723491618077e-06, - "loss": 0.676, + "epoch": 0.27, + "grad_norm": 1.845589811718301, + "learning_rate": 8.511277081272011e-06, + "loss": 0.5945, "step": 3870 }, { - "epoch": 0.41, - "grad_norm": 2.692509799497406, - "learning_rate": 6.709122009514003e-06, - "loss": 0.673, + "epoch": 0.27, + "grad_norm": 1.8835233713200703, + "learning_rate": 8.510458867747007e-06, + "loss": 0.558, "step": 3871 }, { - "epoch": 0.41, - "grad_norm": 2.2919297459345502, - "learning_rate": 6.707520328849074e-06, - "loss": 0.6929, + "epoch": 0.27, + "grad_norm": 1.946078681308966, + "learning_rate": 8.509640468787669e-06, + "loss": 0.5718, "step": 3872 }, { - "epoch": 0.41, - "grad_norm": 2.882493860740183, - "learning_rate": 6.7059184498093696e-06, - "loss": 0.6791, + "epoch": 0.27, + "grad_norm": 1.8744379883570677, + "learning_rate": 8.508821884437229e-06, + "loss": 0.5953, "step": 3873 }, { - "epoch": 0.41, - "grad_norm": 3.6357067311902735, - "learning_rate": 6.70431637258099e-06, - "loss": 0.7293, + "epoch": 0.27, + "grad_norm": 1.9762076886863043, + "learning_rate": 8.50800311473893e-06, + "loss": 0.5926, "step": 3874 }, { - "epoch": 0.41, - "grad_norm": 8.322268264406318, - "learning_rate": 6.702714097350063e-06, - "loss": 0.6628, + "epoch": 0.27, + "grad_norm": 1.5064085921660668, + "learning_rate": 8.507184159736018e-06, + "loss": 0.5242, "step": 3875 }, { - "epoch": 0.41, - "grad_norm": 2.4783117896301388, - "learning_rate": 6.701111624302732e-06, - "loss": 0.6576, + "epoch": 0.28, + "grad_norm": 1.8058809871413941, + "learning_rate": 8.506365019471754e-06, + "loss": 0.5793, "step": 3876 }, { - "epoch": 0.41, - "grad_norm": 2.5768699541037674, - "learning_rate": 6.69950895362517e-06, - "loss": 0.7493, + "epoch": 0.28, + "grad_norm": 1.6473431769773348, + "learning_rate": 8.505545693989408e-06, + "loss": 0.5097, "step": 3877 }, { - "epoch": 0.41, - "grad_norm": 5.682147247687123, - "learning_rate": 6.697906085503572e-06, - "loss": 0.6796, + "epoch": 0.28, + "grad_norm": 1.9477424320572152, + "learning_rate": 8.504726183332261e-06, + "loss": 0.6623, "step": 3878 }, { - "epoch": 0.41, - "grad_norm": 2.2802199978055913, - "learning_rate": 6.696303020124152e-06, - "loss": 0.6168, + "epoch": 0.28, + "grad_norm": 1.6724791826762484, + "learning_rate": 8.503906487543602e-06, + "loss": 0.5732, "step": 3879 }, { - "epoch": 0.41, - "grad_norm": 2.264182484401708, - "learning_rate": 6.694699757673151e-06, - "loss": 0.6551, + "epoch": 0.28, + "grad_norm": 1.4571240598881876, + "learning_rate": 8.503086606666728e-06, + "loss": 0.5174, "step": 3880 }, { - "epoch": 0.41, - "grad_norm": 2.5897290859559083, - "learning_rate": 6.693096298336832e-06, - "loss": 0.6541, + "epoch": 0.28, + "grad_norm": 1.52522568813604, + "learning_rate": 8.502266540744951e-06, + "loss": 0.5481, "step": 3881 }, { - "epoch": 0.41, - "grad_norm": 2.05265338675788, - "learning_rate": 6.691492642301478e-06, - "loss": 0.6983, + "epoch": 0.28, + "grad_norm": 0.9011082182386291, + "learning_rate": 8.501446289821588e-06, + "loss": 0.4806, "step": 3882 }, { - "epoch": 0.41, - "grad_norm": 2.55619801462537, - "learning_rate": 6.6898887897533985e-06, - "loss": 0.7378, + "epoch": 0.28, + "grad_norm": 0.8111808156287608, + "learning_rate": 8.500625853939965e-06, + "loss": 0.471, "step": 3883 }, { - "epoch": 0.41, - "grad_norm": 3.7122207089430885, - "learning_rate": 6.6882847408789255e-06, - "loss": 0.6772, + "epoch": 0.28, + "grad_norm": 1.511034379385225, + "learning_rate": 8.499805233143425e-06, + "loss": 0.5526, "step": 3884 }, { - "epoch": 0.41, - "grad_norm": 2.8904289019733236, - "learning_rate": 6.686680495864411e-06, - "loss": 0.6001, + "epoch": 0.28, + "grad_norm": 1.7561610770375933, + "learning_rate": 8.498984427475315e-06, + "loss": 0.5206, "step": 3885 }, { - "epoch": 0.41, - "grad_norm": 2.1642154485237306, - "learning_rate": 6.6850760548962315e-06, - "loss": 0.7032, + "epoch": 0.28, + "grad_norm": 1.626637988146729, + "learning_rate": 8.498163436978988e-06, + "loss": 0.6187, "step": 3886 }, { - "epoch": 0.41, - "grad_norm": 9.085180115876389, - "learning_rate": 6.683471418160787e-06, - "loss": 0.6206, + "epoch": 0.28, + "grad_norm": 1.445111640340236, + "learning_rate": 8.497342261697816e-06, + "loss": 0.5252, "step": 3887 }, { - "epoch": 0.41, - "grad_norm": 2.33182419264645, - "learning_rate": 6.681866585844501e-06, - "loss": 0.5902, + "epoch": 0.28, + "grad_norm": 2.3743187200672953, + "learning_rate": 8.496520901675175e-06, + "loss": 0.5749, "step": 3888 }, { - "epoch": 0.41, - "grad_norm": 2.158864307849004, - "learning_rate": 6.680261558133816e-06, - "loss": 0.5937, + "epoch": 0.28, + "grad_norm": 1.5949859922185785, + "learning_rate": 8.495699356954454e-06, + "loss": 0.6175, "step": 3889 }, { - "epoch": 0.41, - "grad_norm": 2.7965197556825463, - "learning_rate": 6.678656335215199e-06, - "loss": 0.6601, + "epoch": 0.28, + "grad_norm": 1.5550694736753177, + "learning_rate": 8.494877627579046e-06, + "loss": 0.4961, "step": 3890 }, { - "epoch": 0.41, - "grad_norm": 2.8204118744480993, - "learning_rate": 6.677050917275143e-06, - "loss": 0.6472, + "epoch": 0.28, + "grad_norm": 1.717102428138178, + "learning_rate": 8.49405571359236e-06, + "loss": 0.5395, "step": 3891 }, { - "epoch": 0.41, - "grad_norm": 3.4806015074983594, - "learning_rate": 6.675445304500159e-06, - "loss": 0.6548, + "epoch": 0.28, + "grad_norm": 1.7229315709351922, + "learning_rate": 8.493233615037813e-06, + "loss": 0.5364, "step": 3892 }, { - "epoch": 0.41, - "grad_norm": 1.1229091161769098, - "learning_rate": 6.673839497076783e-06, - "loss": 0.5833, + "epoch": 0.28, + "grad_norm": 1.7767331165377906, + "learning_rate": 8.492411331958828e-06, + "loss": 0.552, "step": 3893 }, { - "epoch": 0.41, - "grad_norm": 3.1338450581533546, - "learning_rate": 6.672233495191572e-06, - "loss": 0.6849, + "epoch": 0.28, + "grad_norm": 2.296947548331043, + "learning_rate": 8.491588864398843e-06, + "loss": 0.5626, "step": 3894 }, { - "epoch": 0.41, - "grad_norm": 2.3209497422936622, - "learning_rate": 6.670627299031109e-06, - "loss": 0.7225, + "epoch": 0.28, + "grad_norm": 1.518250241384418, + "learning_rate": 8.490766212401303e-06, + "loss": 0.4887, "step": 3895 }, { - "epoch": 0.41, - "grad_norm": 2.76307749364276, - "learning_rate": 6.669020908781994e-06, - "loss": 0.6026, + "epoch": 0.28, + "grad_norm": 1.7248719971845967, + "learning_rate": 8.489943376009661e-06, + "loss": 0.5765, "step": 3896 }, { - "epoch": 0.41, - "grad_norm": 2.4047612077616933, - "learning_rate": 6.667414324630856e-06, - "loss": 0.6428, + "epoch": 0.28, + "grad_norm": 1.6281892075346627, + "learning_rate": 8.489120355267388e-06, + "loss": 0.6011, "step": 3897 }, { - "epoch": 0.41, - "grad_norm": 8.962203572754492, - "learning_rate": 6.665807546764341e-06, - "loss": 0.6385, + "epoch": 0.28, + "grad_norm": 1.9997774144640543, + "learning_rate": 8.488297150217954e-06, + "loss": 0.5652, "step": 3898 }, { - "epoch": 0.41, - "grad_norm": 2.3629283595989983, - "learning_rate": 6.664200575369121e-06, - "loss": 0.6119, + "epoch": 0.28, + "grad_norm": 1.5699654832187757, + "learning_rate": 8.487473760904846e-06, + "loss": 0.5349, "step": 3899 }, { - "epoch": 0.41, - "grad_norm": 3.1049318573672307, - "learning_rate": 6.662593410631888e-06, - "loss": 0.6005, + "epoch": 0.28, + "grad_norm": 1.877927373492826, + "learning_rate": 8.486650187371553e-06, + "loss": 0.5748, "step": 3900 }, { - "epoch": 0.41, - "grad_norm": 2.1219922103429094, - "learning_rate": 6.660986052739357e-06, - "loss": 0.6908, + "epoch": 0.28, + "grad_norm": 1.7253889097844912, + "learning_rate": 8.485826429661586e-06, + "loss": 0.623, "step": 3901 }, { - "epoch": 0.41, - "grad_norm": 5.5048628027957625, - "learning_rate": 6.659378501878271e-06, - "loss": 0.5462, + "epoch": 0.28, + "grad_norm": 1.5622490818407206, + "learning_rate": 8.485002487818454e-06, + "loss": 0.5204, "step": 3902 }, { - "epoch": 0.41, - "grad_norm": 2.081524730634284, - "learning_rate": 6.657770758235387e-06, - "loss": 0.6065, + "epoch": 0.28, + "grad_norm": 1.7386041839141897, + "learning_rate": 8.484178361885683e-06, + "loss": 0.6397, "step": 3903 }, { - "epoch": 0.41, - "grad_norm": 2.286231378737835, - "learning_rate": 6.656162821997487e-06, - "loss": 0.6641, + "epoch": 0.28, + "grad_norm": 2.74408724570797, + "learning_rate": 8.483354051906802e-06, + "loss": 0.533, "step": 3904 }, { - "epoch": 0.41, - "grad_norm": 3.018088065090707, - "learning_rate": 6.654554693351379e-06, - "loss": 0.5698, + "epoch": 0.28, + "grad_norm": 1.7955297498248448, + "learning_rate": 8.48252955792536e-06, + "loss": 0.559, "step": 3905 }, { - "epoch": 0.41, - "grad_norm": 2.0133508154340487, - "learning_rate": 6.652946372483889e-06, - "loss": 0.6064, + "epoch": 0.28, + "grad_norm": 1.6497575387211016, + "learning_rate": 8.481704879984904e-06, + "loss": 0.5891, "step": 3906 }, { - "epoch": 0.41, - "grad_norm": 2.681912809689596, - "learning_rate": 6.651337859581868e-06, - "loss": 0.6913, + "epoch": 0.28, + "grad_norm": 2.912391218739193, + "learning_rate": 8.480880018129001e-06, + "loss": 0.6263, "step": 3907 }, { - "epoch": 0.41, - "grad_norm": 3.0310073690024075, - "learning_rate": 6.6497291548321876e-06, - "loss": 0.6523, + "epoch": 0.28, + "grad_norm": 1.7998160165888166, + "learning_rate": 8.48005497240122e-06, + "loss": 0.5699, "step": 3908 }, { - "epoch": 0.41, - "grad_norm": 2.3565442897512345, - "learning_rate": 6.648120258421744e-06, - "loss": 0.6395, + "epoch": 0.28, + "grad_norm": 1.5270859332299838, + "learning_rate": 8.479229742845141e-06, + "loss": 0.548, "step": 3909 }, { - "epoch": 0.41, - "grad_norm": 2.52692979561079, - "learning_rate": 6.646511170537452e-06, - "loss": 0.6792, + "epoch": 0.28, + "grad_norm": 1.4620700207737807, + "learning_rate": 8.478404329504359e-06, + "loss": 0.535, "step": 3910 }, { - "epoch": 0.41, - "grad_norm": 1.21140083501598, - "learning_rate": 6.644901891366251e-06, - "loss": 0.5643, + "epoch": 0.28, + "grad_norm": 1.5574994813497776, + "learning_rate": 8.477578732422474e-06, + "loss": 0.5261, "step": 3911 }, { - "epoch": 0.41, - "grad_norm": 1.0497136827512572, - "learning_rate": 6.643292421095105e-06, - "loss": 0.5934, + "epoch": 0.28, + "grad_norm": 1.7367374415785726, + "learning_rate": 8.476752951643099e-06, + "loss": 0.5373, "step": 3912 }, { - "epoch": 0.41, - "grad_norm": 3.4574095439368064, - "learning_rate": 6.641682759910993e-06, - "loss": 0.6165, + "epoch": 0.28, + "grad_norm": 1.007821685652148, + "learning_rate": 8.47592698720985e-06, + "loss": 0.4709, "step": 3913 }, { - "epoch": 0.41, - "grad_norm": 3.1072803681357684, - "learning_rate": 6.640072908000926e-06, - "loss": 0.7101, + "epoch": 0.28, + "grad_norm": 1.494088857985598, + "learning_rate": 8.475100839166359e-06, + "loss": 0.5924, "step": 3914 }, { - "epoch": 0.41, - "grad_norm": 2.449280579698595, - "learning_rate": 6.638462865551929e-06, - "loss": 0.631, + "epoch": 0.28, + "grad_norm": 1.824779319289794, + "learning_rate": 8.474274507556265e-06, + "loss": 0.5928, "step": 3915 }, { - "epoch": 0.41, - "grad_norm": 2.300143552458361, - "learning_rate": 6.6368526327510515e-06, - "loss": 0.6856, + "epoch": 0.28, + "grad_norm": 1.7551377743456658, + "learning_rate": 8.473447992423223e-06, + "loss": 0.5624, "step": 3916 }, { - "epoch": 0.41, - "grad_norm": 2.1842280323695746, - "learning_rate": 6.635242209785369e-06, - "loss": 0.7335, + "epoch": 0.28, + "grad_norm": 6.120771567426235, + "learning_rate": 8.472621293810886e-06, + "loss": 0.5468, "step": 3917 }, { - "epoch": 0.41, - "grad_norm": 4.49088278011825, - "learning_rate": 6.633631596841972e-06, - "loss": 0.7089, + "epoch": 0.28, + "grad_norm": 0.758453223922965, + "learning_rate": 8.471794411762927e-06, + "loss": 0.4498, "step": 3918 }, { - "epoch": 0.41, - "grad_norm": 2.9551946728374507, - "learning_rate": 6.63202079410798e-06, - "loss": 0.7198, + "epoch": 0.28, + "grad_norm": 4.975516555456688, + "learning_rate": 8.470967346323023e-06, + "loss": 0.601, "step": 3919 }, { - "epoch": 0.41, - "grad_norm": 2.5169889269326773, - "learning_rate": 6.630409801770528e-06, - "loss": 0.6483, + "epoch": 0.28, + "grad_norm": 1.5824313925397464, + "learning_rate": 8.47014009753486e-06, + "loss": 0.5819, "step": 3920 }, { - "epoch": 0.41, - "grad_norm": 2.5244649430685646, - "learning_rate": 6.6287986200167785e-06, - "loss": 0.6174, + "epoch": 0.28, + "grad_norm": 2.662666106825579, + "learning_rate": 8.469312665442141e-06, + "loss": 0.5364, "step": 3921 }, { - "epoch": 0.41, - "grad_norm": 2.6269573809565423, - "learning_rate": 6.627187249033915e-06, - "loss": 0.6274, + "epoch": 0.28, + "grad_norm": 2.3776311231534644, + "learning_rate": 8.468485050088571e-06, + "loss": 0.5791, "step": 3922 }, { - "epoch": 0.41, - "grad_norm": 2.855773301785785, - "learning_rate": 6.6255756890091394e-06, - "loss": 0.6258, + "epoch": 0.28, + "grad_norm": 3.5041814939282467, + "learning_rate": 8.467657251517866e-06, + "loss": 0.5859, "step": 3923 }, { - "epoch": 0.41, - "grad_norm": 2.4211612959153817, - "learning_rate": 6.6239639401296796e-06, - "loss": 0.6101, + "epoch": 0.28, + "grad_norm": 1.9676447211736459, + "learning_rate": 8.466829269773757e-06, + "loss": 0.4731, "step": 3924 }, { - "epoch": 0.41, - "grad_norm": 2.259452031681648, - "learning_rate": 6.622352002582783e-06, - "loss": 0.6934, + "epoch": 0.28, + "grad_norm": 0.8649870130246029, + "learning_rate": 8.466001104899978e-06, + "loss": 0.451, "step": 3925 }, { - "epoch": 0.41, - "grad_norm": 3.0056254388720265, - "learning_rate": 6.620739876555721e-06, - "loss": 0.7024, + "epoch": 0.28, + "grad_norm": 1.7628024025302818, + "learning_rate": 8.465172756940277e-06, + "loss": 0.5385, "step": 3926 }, { - "epoch": 0.41, - "grad_norm": 2.950640775072797, - "learning_rate": 6.619127562235786e-06, - "loss": 0.7432, + "epoch": 0.28, + "grad_norm": 1.7076830321863437, + "learning_rate": 8.464344225938409e-06, + "loss": 0.5693, "step": 3927 }, { - "epoch": 0.41, - "grad_norm": 2.111457506555049, - "learning_rate": 6.61751505981029e-06, - "loss": 0.6785, + "epoch": 0.28, + "grad_norm": 0.768814420952646, + "learning_rate": 8.463515511938139e-06, + "loss": 0.4521, "step": 3928 }, { - "epoch": 0.41, - "grad_norm": 5.205473900545925, - "learning_rate": 6.615902369466571e-06, - "loss": 0.6155, + "epoch": 0.28, + "grad_norm": 2.272510162161342, + "learning_rate": 8.462686614983245e-06, + "loss": 0.5533, "step": 3929 }, { - "epoch": 0.41, - "grad_norm": 1.1442082817667982, - "learning_rate": 6.614289491391985e-06, - "loss": 0.6085, + "epoch": 0.28, + "grad_norm": 1.6502344708222791, + "learning_rate": 8.46185753511751e-06, + "loss": 0.5067, "step": 3930 }, { - "epoch": 0.41, - "grad_norm": 3.097758491032023, - "learning_rate": 6.612676425773914e-06, - "loss": 0.6214, + "epoch": 0.28, + "grad_norm": 1.612801447754268, + "learning_rate": 8.461028272384731e-06, + "loss": 0.5817, "step": 3931 }, { - "epoch": 0.41, - "grad_norm": 4.102972042758122, - "learning_rate": 6.6110631727997566e-06, - "loss": 0.6056, + "epoch": 0.28, + "grad_norm": 1.6191544535317755, + "learning_rate": 8.46019882682871e-06, + "loss": 0.5873, "step": 3932 }, { - "epoch": 0.41, - "grad_norm": 1.9937462472618483, - "learning_rate": 6.609449732656936e-06, - "loss": 0.6597, + "epoch": 0.28, + "grad_norm": 1.8253062870012653, + "learning_rate": 8.459369198493261e-06, + "loss": 0.6302, "step": 3933 }, { - "epoch": 0.41, - "grad_norm": 2.5897301973961366, - "learning_rate": 6.6078361055328986e-06, - "loss": 0.6214, + "epoch": 0.28, + "grad_norm": 1.8381569561678566, + "learning_rate": 8.458539387422211e-06, + "loss": 0.5883, "step": 3934 }, { - "epoch": 0.41, - "grad_norm": 1.0149618917681156, - "learning_rate": 6.606222291615112e-06, - "loss": 0.6023, + "epoch": 0.28, + "grad_norm": 0.8546694064714919, + "learning_rate": 8.45770939365939e-06, + "loss": 0.445, "step": 3935 }, { - "epoch": 0.41, - "grad_norm": 2.253201954300004, - "learning_rate": 6.60460829109106e-06, - "loss": 0.6611, + "epoch": 0.28, + "grad_norm": 1.887344553623164, + "learning_rate": 8.456879217248643e-06, + "loss": 0.5306, "step": 3936 }, { - "epoch": 0.41, - "grad_norm": 2.9592197675312657, - "learning_rate": 6.602994104148256e-06, - "loss": 0.7145, + "epoch": 0.28, + "grad_norm": 1.598145442201589, + "learning_rate": 8.456048858233823e-06, + "loss": 0.5321, "step": 3937 }, { - "epoch": 0.41, - "grad_norm": 2.2284657591030883, - "learning_rate": 6.601379730974231e-06, - "loss": 0.618, + "epoch": 0.28, + "grad_norm": 1.6047801852214043, + "learning_rate": 8.45521831665879e-06, + "loss": 0.5298, "step": 3938 }, { - "epoch": 0.41, - "grad_norm": 4.557356204052841, - "learning_rate": 6.599765171756538e-06, - "loss": 0.6578, + "epoch": 0.28, + "grad_norm": 2.1569446303112563, + "learning_rate": 8.45438759256742e-06, + "loss": 0.5746, "step": 3939 }, { - "epoch": 0.41, - "grad_norm": 6.091674295085208, - "learning_rate": 6.598150426682752e-06, - "loss": 0.6179, + "epoch": 0.28, + "grad_norm": 1.5203002659017415, + "learning_rate": 8.45355668600359e-06, + "loss": 0.5476, "step": 3940 }, { - "epoch": 0.41, - "grad_norm": 2.0618356021886943, - "learning_rate": 6.596535495940468e-06, - "loss": 0.6168, + "epoch": 0.28, + "grad_norm": 1.8265986190107226, + "learning_rate": 8.452725597011196e-06, + "loss": 0.5699, "step": 3941 }, { - "epoch": 0.41, - "grad_norm": 2.4424041643505827, - "learning_rate": 6.594920379717307e-06, - "loss": 0.6277, + "epoch": 0.28, + "grad_norm": 1.3931560788630135, + "learning_rate": 8.451894325634135e-06, + "loss": 0.5585, "step": 3942 }, { - "epoch": 0.41, - "grad_norm": 2.6813309064292485, - "learning_rate": 6.593305078200907e-06, - "loss": 0.6051, + "epoch": 0.28, + "grad_norm": 1.7811256607720838, + "learning_rate": 8.45106287191632e-06, + "loss": 0.6562, "step": 3943 }, { - "epoch": 0.42, - "grad_norm": 2.067586995607296, - "learning_rate": 6.591689591578927e-06, - "loss": 0.631, + "epoch": 0.28, + "grad_norm": 1.5548688668389956, + "learning_rate": 8.45023123590167e-06, + "loss": 0.5232, "step": 3944 }, { - "epoch": 0.42, - "grad_norm": 2.1568079707086967, - "learning_rate": 6.590073920039052e-06, - "loss": 0.6446, + "epoch": 0.28, + "grad_norm": 0.8346341250724972, + "learning_rate": 8.449399417634112e-06, + "loss": 0.4693, "step": 3945 }, { - "epoch": 0.42, - "grad_norm": 2.7031629573317324, - "learning_rate": 6.588458063768985e-06, - "loss": 0.6328, + "epoch": 0.28, + "grad_norm": 1.5912678588200104, + "learning_rate": 8.448567417157593e-06, + "loss": 0.5796, "step": 3946 }, { - "epoch": 0.42, - "grad_norm": 2.406212144524127, - "learning_rate": 6.586842022956453e-06, - "loss": 0.6518, + "epoch": 0.28, + "grad_norm": 1.8086171175386785, + "learning_rate": 8.447735234516055e-06, + "loss": 0.58, "step": 3947 }, { - "epoch": 0.42, - "grad_norm": 4.66916693541006, - "learning_rate": 6.585225797789201e-06, - "loss": 0.6624, + "epoch": 0.28, + "grad_norm": 2.2436880983969254, + "learning_rate": 8.446902869753462e-06, + "loss": 0.5484, "step": 3948 }, { - "epoch": 0.42, - "grad_norm": 2.2582234615243375, - "learning_rate": 6.583609388454998e-06, - "loss": 0.555, + "epoch": 0.28, + "grad_norm": 1.7065584330790715, + "learning_rate": 8.446070322913777e-06, + "loss": 0.5381, "step": 3949 }, { - "epoch": 0.42, - "grad_norm": 2.7199146063706134, - "learning_rate": 6.581992795141634e-06, - "loss": 0.6526, + "epoch": 0.28, + "grad_norm": 1.8642895398490482, + "learning_rate": 8.445237594040982e-06, + "loss": 0.6341, "step": 3950 }, { - "epoch": 0.42, - "grad_norm": 2.5390407294947397, - "learning_rate": 6.580376018036921e-06, - "loss": 0.6671, + "epoch": 0.28, + "grad_norm": 1.8245125139854665, + "learning_rate": 8.444404683179063e-06, + "loss": 0.5531, "step": 3951 }, { - "epoch": 0.42, - "grad_norm": 4.7476904706174, - "learning_rate": 6.578759057328691e-06, - "loss": 0.6829, + "epoch": 0.28, + "grad_norm": 0.7587551332364196, + "learning_rate": 8.443571590372017e-06, + "loss": 0.4666, "step": 3952 }, { - "epoch": 0.42, - "grad_norm": 2.5382661667053528, - "learning_rate": 6.5771419132047965e-06, - "loss": 0.6645, + "epoch": 0.28, + "grad_norm": 1.5948167325764442, + "learning_rate": 8.442738315663852e-06, + "loss": 0.518, "step": 3953 }, { - "epoch": 0.42, - "grad_norm": 2.4043076498085805, - "learning_rate": 6.5755245858531135e-06, - "loss": 0.5974, + "epoch": 0.28, + "grad_norm": 1.616215754833975, + "learning_rate": 8.441904859098583e-06, + "loss": 0.5112, "step": 3954 }, { - "epoch": 0.42, - "grad_norm": 3.0677814480436205, - "learning_rate": 6.573907075461538e-06, - "loss": 0.6752, + "epoch": 0.28, + "grad_norm": 1.8309392390876416, + "learning_rate": 8.441071220720237e-06, + "loss": 0.6067, "step": 3955 }, { - "epoch": 0.42, - "grad_norm": 2.3299465309321636, - "learning_rate": 6.57228938221799e-06, - "loss": 0.7099, + "epoch": 0.28, + "grad_norm": 2.7529402545762207, + "learning_rate": 8.440237400572849e-06, + "loss": 0.563, "step": 3956 }, { - "epoch": 0.42, - "grad_norm": 3.5992623283864615, - "learning_rate": 6.5706715063104065e-06, - "loss": 0.7091, + "epoch": 0.28, + "grad_norm": 1.599523928574332, + "learning_rate": 8.439403398700464e-06, + "loss": 0.508, "step": 3957 }, { - "epoch": 0.42, - "grad_norm": 3.1185187454894776, - "learning_rate": 6.569053447926746e-06, - "loss": 0.5476, + "epoch": 0.28, + "grad_norm": 1.6236762333599375, + "learning_rate": 8.438569215147137e-06, + "loss": 0.5234, "step": 3958 }, { - "epoch": 0.42, - "grad_norm": 3.4670327552541123, - "learning_rate": 6.567435207254993e-06, - "loss": 0.679, + "epoch": 0.28, + "grad_norm": 2.3110297588151867, + "learning_rate": 8.437734849956934e-06, + "loss": 0.5694, "step": 3959 }, { - "epoch": 0.42, - "grad_norm": 4.602615705445102, - "learning_rate": 6.565816784483147e-06, - "loss": 0.6774, + "epoch": 0.28, + "grad_norm": 1.6356690209199236, + "learning_rate": 8.436900303173924e-06, + "loss": 0.6264, "step": 3960 }, { - "epoch": 0.42, - "grad_norm": 5.735033391286663, - "learning_rate": 6.564198179799234e-06, - "loss": 0.6466, + "epoch": 0.28, + "grad_norm": 2.3535695264636787, + "learning_rate": 8.436065574842198e-06, + "loss": 0.6027, "step": 3961 }, { - "epoch": 0.42, - "grad_norm": 3.8877061902133954, - "learning_rate": 6.5625793933912985e-06, - "loss": 0.594, + "epoch": 0.28, + "grad_norm": 1.8487447420515397, + "learning_rate": 8.435230665005845e-06, + "loss": 0.5813, "step": 3962 }, { - "epoch": 0.42, - "grad_norm": 2.814956124692221, - "learning_rate": 6.5609604254474065e-06, - "loss": 0.6185, + "epoch": 0.28, + "grad_norm": 1.6676239186164008, + "learning_rate": 8.434395573708967e-06, + "loss": 0.6328, "step": 3963 }, { - "epoch": 0.42, - "grad_norm": 1.1484699815057058, - "learning_rate": 6.559341276155644e-06, - "loss": 0.6048, + "epoch": 0.28, + "grad_norm": 2.0493847866688935, + "learning_rate": 8.433560300995677e-06, + "loss": 0.4732, "step": 3964 }, { - "epoch": 0.42, - "grad_norm": 2.872534078697945, - "learning_rate": 6.55772194570412e-06, - "loss": 0.6446, + "epoch": 0.28, + "grad_norm": 1.8108122335139474, + "learning_rate": 8.432724846910099e-06, + "loss": 0.5873, "step": 3965 }, { - "epoch": 0.42, - "grad_norm": 5.229090156228293, - "learning_rate": 6.5561024342809625e-06, - "loss": 0.6325, + "epoch": 0.28, + "grad_norm": 1.5987070775906682, + "learning_rate": 8.431889211496363e-06, + "loss": 0.5984, "step": 3966 }, { - "epoch": 0.42, - "grad_norm": 2.5077624702277195, - "learning_rate": 6.554482742074323e-06, - "loss": 0.633, + "epoch": 0.28, + "grad_norm": 1.6288448604635806, + "learning_rate": 8.431053394798609e-06, + "loss": 0.4614, "step": 3967 }, { - "epoch": 0.42, - "grad_norm": 2.6547530593774695, - "learning_rate": 6.552862869272371e-06, - "loss": 0.6035, + "epoch": 0.28, + "grad_norm": 2.380907759171672, + "learning_rate": 8.43021739686099e-06, + "loss": 0.5924, "step": 3968 }, { - "epoch": 0.42, - "grad_norm": 3.367826287135605, - "learning_rate": 6.551242816063302e-06, - "loss": 0.6763, + "epoch": 0.28, + "grad_norm": 1.661946727218087, + "learning_rate": 8.429381217727663e-06, + "loss": 0.5288, "step": 3969 }, { - "epoch": 0.42, - "grad_norm": 2.4066120293512174, - "learning_rate": 6.5496225826353254e-06, - "loss": 0.6515, + "epoch": 0.28, + "grad_norm": 0.8258645136090841, + "learning_rate": 8.4285448574428e-06, + "loss": 0.4597, "step": 3970 }, { - "epoch": 0.42, - "grad_norm": 2.5260789922901496, - "learning_rate": 6.548002169176677e-06, - "loss": 0.651, + "epoch": 0.28, + "grad_norm": 1.797905676763181, + "learning_rate": 8.42770831605058e-06, + "loss": 0.5756, "step": 3971 }, { - "epoch": 0.42, - "grad_norm": 2.3066437486350457, - "learning_rate": 6.54638157587561e-06, - "loss": 0.5797, + "epoch": 0.28, + "grad_norm": 1.6991997043360314, + "learning_rate": 8.426871593595193e-06, + "loss": 0.5835, "step": 3972 }, { - "epoch": 0.42, - "grad_norm": 2.181813637695597, - "learning_rate": 6.544760802920402e-06, - "loss": 0.6741, + "epoch": 0.28, + "grad_norm": 1.677133852266488, + "learning_rate": 8.426034690120837e-06, + "loss": 0.5363, "step": 3973 }, { - "epoch": 0.42, - "grad_norm": 3.3250608385498466, - "learning_rate": 6.543139850499352e-06, - "loss": 0.6409, + "epoch": 0.28, + "grad_norm": 1.8752645584682852, + "learning_rate": 8.425197605671717e-06, + "loss": 0.5359, "step": 3974 }, { - "epoch": 0.42, - "grad_norm": 2.3644447611901835, - "learning_rate": 6.5415187188007726e-06, - "loss": 0.6754, + "epoch": 0.28, + "grad_norm": 1.8895212098596288, + "learning_rate": 8.424360340292057e-06, + "loss": 0.5312, "step": 3975 }, { - "epoch": 0.42, - "grad_norm": 2.405053664018577, - "learning_rate": 6.539897408013005e-06, - "loss": 0.6084, + "epoch": 0.28, + "grad_norm": 1.7878730197700838, + "learning_rate": 8.42352289402608e-06, + "loss": 0.6166, "step": 3976 }, { - "epoch": 0.42, - "grad_norm": 2.1188551440111305, - "learning_rate": 6.538275918324408e-06, - "loss": 0.7079, + "epoch": 0.28, + "grad_norm": 0.7350754508180388, + "learning_rate": 8.42268526691802e-06, + "loss": 0.4804, "step": 3977 }, { - "epoch": 0.42, - "grad_norm": 2.439917462899966, - "learning_rate": 6.536654249923361e-06, - "loss": 0.6137, + "epoch": 0.28, + "grad_norm": 1.4942816717868859, + "learning_rate": 8.42184745901213e-06, + "loss": 0.4688, "step": 3978 }, { - "epoch": 0.42, - "grad_norm": 2.6239270579211404, - "learning_rate": 6.535032402998266e-06, - "loss": 0.5237, + "epoch": 0.28, + "grad_norm": 1.4579405012475048, + "learning_rate": 8.421009470352662e-06, + "loss": 0.5443, "step": 3979 }, { - "epoch": 0.42, - "grad_norm": 3.4664442974636303, - "learning_rate": 6.533410377737544e-06, - "loss": 0.6955, + "epoch": 0.28, + "grad_norm": 1.6309329050698778, + "learning_rate": 8.42017130098388e-06, + "loss": 0.564, "step": 3980 }, { - "epoch": 0.42, - "grad_norm": 2.3044985882207807, - "learning_rate": 6.531788174329636e-06, - "loss": 0.5246, + "epoch": 0.28, + "grad_norm": 1.6936002756984099, + "learning_rate": 8.419332950950061e-06, + "loss": 0.5474, "step": 3981 }, { - "epoch": 0.42, - "grad_norm": 2.007003515117586, - "learning_rate": 6.5301657929630055e-06, - "loss": 0.6039, + "epoch": 0.28, + "grad_norm": 1.5813604606549683, + "learning_rate": 8.418494420295491e-06, + "loss": 0.5778, "step": 3982 }, { - "epoch": 0.42, - "grad_norm": 1.2164380112159745, - "learning_rate": 6.5285432338261365e-06, - "loss": 0.616, + "epoch": 0.28, + "grad_norm": 1.9129866474477308, + "learning_rate": 8.417655709064461e-06, + "loss": 0.5822, "step": 3983 }, { - "epoch": 0.42, - "grad_norm": 2.257824172908572, - "learning_rate": 6.526920497107535e-06, - "loss": 0.7186, + "epoch": 0.28, + "grad_norm": 1.610502769212203, + "learning_rate": 8.416816817301275e-06, + "loss": 0.5782, "step": 3984 }, { - "epoch": 0.42, - "grad_norm": 1.093528093998501, - "learning_rate": 6.525297582995722e-06, - "loss": 0.5979, + "epoch": 0.28, + "grad_norm": 1.7316237703167854, + "learning_rate": 8.415977745050247e-06, + "loss": 0.5445, "step": 3985 }, { - "epoch": 0.42, - "grad_norm": 2.3370494508343467, - "learning_rate": 6.523674491679246e-06, - "loss": 0.639, + "epoch": 0.28, + "grad_norm": 2.2661168274225485, + "learning_rate": 8.415138492355703e-06, + "loss": 0.5267, "step": 3986 }, { - "epoch": 0.42, - "grad_norm": 3.1729179222230477, - "learning_rate": 6.522051223346672e-06, - "loss": 0.6788, + "epoch": 0.28, + "grad_norm": 1.8503971759713278, + "learning_rate": 8.414299059261968e-06, + "loss": 0.6599, "step": 3987 }, { - "epoch": 0.42, - "grad_norm": 4.462532611308276, - "learning_rate": 6.520427778186586e-06, - "loss": 0.7091, + "epoch": 0.28, + "grad_norm": 1.921606759347505, + "learning_rate": 8.41345944581339e-06, + "loss": 0.5801, "step": 3988 }, { - "epoch": 0.42, - "grad_norm": 2.673478312971706, - "learning_rate": 6.518804156387597e-06, - "loss": 0.6831, + "epoch": 0.28, + "grad_norm": 1.518327733287103, + "learning_rate": 8.412619652054316e-06, + "loss": 0.5137, "step": 3989 }, { - "epoch": 0.42, - "grad_norm": 2.9158479320514985, - "learning_rate": 6.517180358138332e-06, - "loss": 0.6926, + "epoch": 0.28, + "grad_norm": 1.7401412001000505, + "learning_rate": 8.411779678029109e-06, + "loss": 0.5586, "step": 3990 }, { - "epoch": 0.42, - "grad_norm": 2.635411987857093, - "learning_rate": 6.515556383627437e-06, - "loss": 0.666, + "epoch": 0.28, + "grad_norm": 1.6689117094180324, + "learning_rate": 8.410939523782138e-06, + "loss": 0.5798, "step": 3991 }, { - "epoch": 0.42, - "grad_norm": 2.2626676614651906, - "learning_rate": 6.513932233043584e-06, - "loss": 0.7135, + "epoch": 0.28, + "grad_norm": 1.608921073458193, + "learning_rate": 8.410099189357783e-06, + "loss": 0.549, "step": 3992 }, { - "epoch": 0.42, - "grad_norm": 2.83263520238375, - "learning_rate": 6.512307906575459e-06, - "loss": 0.5915, + "epoch": 0.28, + "grad_norm": 0.8746926953931841, + "learning_rate": 8.409258674800437e-06, + "loss": 0.4551, "step": 3993 }, { - "epoch": 0.42, - "grad_norm": 2.204046953504845, - "learning_rate": 6.510683404411774e-06, - "loss": 0.6271, + "epoch": 0.28, + "grad_norm": 0.7688932404609239, + "learning_rate": 8.408417980154492e-06, + "loss": 0.4491, "step": 3994 }, { - "epoch": 0.42, - "grad_norm": 2.593299117358978, - "learning_rate": 6.509058726741258e-06, - "loss": 0.6616, + "epoch": 0.28, + "grad_norm": 1.6736915090796793, + "learning_rate": 8.407577105464363e-06, + "loss": 0.6347, "step": 3995 }, { - "epoch": 0.42, - "grad_norm": 1.996690826929998, - "learning_rate": 6.50743387375266e-06, - "loss": 0.6885, + "epoch": 0.28, + "grad_norm": 1.834201287544821, + "learning_rate": 8.406736050774462e-06, + "loss": 0.5363, "step": 3996 }, { - "epoch": 0.42, - "grad_norm": 12.933011472212376, - "learning_rate": 6.505808845634753e-06, - "loss": 0.6603, + "epoch": 0.28, + "grad_norm": 1.5855171945393083, + "learning_rate": 8.405894816129222e-06, + "loss": 0.5663, "step": 3997 }, { - "epoch": 0.42, - "grad_norm": 2.453187947445511, - "learning_rate": 6.504183642576327e-06, - "loss": 0.7046, + "epoch": 0.28, + "grad_norm": 2.52855010657995, + "learning_rate": 8.405053401573075e-06, + "loss": 0.5572, "step": 3998 }, { - "epoch": 0.42, - "grad_norm": 2.4465722034806157, - "learning_rate": 6.502558264766194e-06, - "loss": 0.5633, + "epoch": 0.28, + "grad_norm": 1.6939178825061323, + "learning_rate": 8.40421180715047e-06, + "loss": 0.553, "step": 3999 }, { - "epoch": 0.42, - "grad_norm": 2.341833452210207, - "learning_rate": 6.500932712393185e-06, - "loss": 0.6048, + "epoch": 0.28, + "grad_norm": 1.783722669013582, + "learning_rate": 8.403370032905862e-06, + "loss": 0.5737, "step": 4000 }, { - "epoch": 0.42, - "grad_norm": 2.5314039211160955, - "learning_rate": 6.499306985646152e-06, - "loss": 0.6565, + "epoch": 0.28, + "grad_norm": 2.573082758748703, + "learning_rate": 8.402528078883719e-06, + "loss": 0.5973, "step": 4001 }, { - "epoch": 0.42, - "grad_norm": 2.307652014877147, - "learning_rate": 6.497681084713969e-06, - "loss": 0.6175, + "epoch": 0.28, + "grad_norm": 1.692190853386022, + "learning_rate": 8.40168594512851e-06, + "loss": 0.6174, "step": 4002 }, { - "epoch": 0.42, - "grad_norm": 3.7936798318747624, - "learning_rate": 6.496055009785526e-06, - "loss": 0.6992, + "epoch": 0.28, + "grad_norm": 0.8822906433063223, + "learning_rate": 8.400843631684726e-06, + "loss": 0.4572, "step": 4003 }, { - "epoch": 0.42, - "grad_norm": 2.6272219714248983, - "learning_rate": 6.494428761049736e-06, - "loss": 0.6342, + "epoch": 0.28, + "grad_norm": 1.6965229441380114, + "learning_rate": 8.400001138596856e-06, + "loss": 0.5511, "step": 4004 }, { - "epoch": 0.42, - "grad_norm": 2.1079646961470906, - "learning_rate": 6.492802338695533e-06, - "loss": 0.6606, + "epoch": 0.28, + "grad_norm": 1.4713655506288976, + "learning_rate": 8.399158465909405e-06, + "loss": 0.5387, "step": 4005 }, { - "epoch": 0.42, - "grad_norm": 5.297324004495899, - "learning_rate": 6.491175742911869e-06, - "loss": 0.6246, + "epoch": 0.28, + "grad_norm": 1.3919539899412228, + "learning_rate": 8.398315613666888e-06, + "loss": 0.5249, "step": 4006 }, { - "epoch": 0.42, - "grad_norm": 2.6530141636498663, - "learning_rate": 6.489548973887717e-06, - "loss": 0.6609, + "epoch": 0.28, + "grad_norm": 1.7070397285381365, + "learning_rate": 8.39747258191382e-06, + "loss": 0.6144, "step": 4007 }, { - "epoch": 0.42, - "grad_norm": 3.1063440835651397, - "learning_rate": 6.4879220318120735e-06, - "loss": 0.6974, + "epoch": 0.28, + "grad_norm": 1.5458134207901157, + "learning_rate": 8.396629370694745e-06, + "loss": 0.5575, "step": 4008 }, { - "epoch": 0.42, - "grad_norm": 2.3068250586790517, - "learning_rate": 6.486294916873947e-06, - "loss": 0.6624, + "epoch": 0.28, + "grad_norm": 1.7808046038645111, + "learning_rate": 8.395785980054194e-06, + "loss": 0.5256, "step": 4009 }, { - "epoch": 0.42, - "grad_norm": 2.459942839783369, - "learning_rate": 6.484667629262375e-06, - "loss": 0.6707, + "epoch": 0.28, + "grad_norm": 1.432172348267557, + "learning_rate": 8.39494241003672e-06, + "loss": 0.5951, "step": 4010 }, { - "epoch": 0.42, - "grad_norm": 2.1027394322760666, - "learning_rate": 6.4830401691664106e-06, - "loss": 0.6924, + "epoch": 0.28, + "grad_norm": 1.9276590628837833, + "learning_rate": 8.394098660686884e-06, + "loss": 0.5247, "step": 4011 }, { - "epoch": 0.42, - "grad_norm": 1.1296970727448212, - "learning_rate": 6.481412536775125e-06, - "loss": 0.5682, + "epoch": 0.28, + "grad_norm": 1.6475198730491059, + "learning_rate": 8.393254732049259e-06, + "loss": 0.5727, "step": 4012 }, { - "epoch": 0.42, - "grad_norm": 2.62301953143833, - "learning_rate": 6.479784732277612e-06, - "loss": 0.6738, + "epoch": 0.28, + "grad_norm": 1.5890425304382039, + "learning_rate": 8.39241062416842e-06, + "loss": 0.603, "step": 4013 }, { - "epoch": 0.42, - "grad_norm": 2.8179450664566996, - "learning_rate": 6.478156755862988e-06, - "loss": 0.6378, + "epoch": 0.28, + "grad_norm": 1.5224802825709185, + "learning_rate": 8.391566337088955e-06, + "loss": 0.5591, "step": 4014 }, { - "epoch": 0.42, - "grad_norm": 2.7307924595408544, - "learning_rate": 6.4765286077203844e-06, - "loss": 0.5877, + "epoch": 0.28, + "grad_norm": 1.8771937857811312, + "learning_rate": 8.390721870855464e-06, + "loss": 0.6913, "step": 4015 }, { - "epoch": 0.42, - "grad_norm": 4.571571986675738, - "learning_rate": 6.4749002880389566e-06, - "loss": 0.6638, + "epoch": 0.28, + "grad_norm": 1.6959555912894153, + "learning_rate": 8.389877225512556e-06, + "loss": 0.5441, "step": 4016 }, { - "epoch": 0.42, - "grad_norm": 2.603346420805311, - "learning_rate": 6.473271797007876e-06, - "loss": 0.6619, + "epoch": 0.29, + "grad_norm": 4.129041137736984, + "learning_rate": 8.389032401104845e-06, + "loss": 0.6159, "step": 4017 }, { - "epoch": 0.42, - "grad_norm": 2.174453465113902, - "learning_rate": 6.471643134816336e-06, - "loss": 0.7298, + "epoch": 0.29, + "grad_norm": 1.8311820613043999, + "learning_rate": 8.388187397676961e-06, + "loss": 0.5701, "step": 4018 }, { - "epoch": 0.42, - "grad_norm": 2.5043155473625514, - "learning_rate": 6.470014301653552e-06, - "loss": 0.6076, + "epoch": 0.29, + "grad_norm": 1.6698919660467186, + "learning_rate": 8.387342215273535e-06, + "loss": 0.5757, "step": 4019 }, { - "epoch": 0.42, - "grad_norm": 2.8733996645916116, - "learning_rate": 6.468385297708755e-06, - "loss": 0.6422, + "epoch": 0.29, + "grad_norm": 0.8299990858324686, + "learning_rate": 8.38649685393922e-06, + "loss": 0.4834, "step": 4020 }, { - "epoch": 0.42, - "grad_norm": 1.0053641792809447, - "learning_rate": 6.466756123171199e-06, - "loss": 0.6017, + "epoch": 0.29, + "grad_norm": 1.6203114327810626, + "learning_rate": 8.385651313718662e-06, + "loss": 0.5173, "step": 4021 }, { - "epoch": 0.42, - "grad_norm": 2.170505948062594, - "learning_rate": 6.465126778230156e-06, - "loss": 0.6564, + "epoch": 0.29, + "grad_norm": 1.808593882326435, + "learning_rate": 8.384805594656528e-06, + "loss": 0.6565, "step": 4022 }, { - "epoch": 0.42, - "grad_norm": 2.2632578787097453, - "learning_rate": 6.46349726307492e-06, - "loss": 0.6804, + "epoch": 0.29, + "grad_norm": 1.893872488302302, + "learning_rate": 8.383959696797498e-06, + "loss": 0.5142, "step": 4023 }, { - "epoch": 0.42, - "grad_norm": 2.936556398536724, - "learning_rate": 6.4618675778948026e-06, - "loss": 0.6739, + "epoch": 0.29, + "grad_norm": 1.6008374920711723, + "learning_rate": 8.383113620186247e-06, + "loss": 0.5952, "step": 4024 }, { - "epoch": 0.42, - "grad_norm": 2.1638172540039973, - "learning_rate": 6.4602377228791345e-06, - "loss": 0.6198, + "epoch": 0.29, + "grad_norm": 1.8134143618797658, + "learning_rate": 8.382267364867473e-06, + "loss": 0.6024, "step": 4025 }, { - "epoch": 0.42, - "grad_norm": 2.599114630574275, - "learning_rate": 6.458607698217271e-06, - "loss": 0.5974, + "epoch": 0.29, + "grad_norm": 1.8130005739396458, + "learning_rate": 8.381420930885876e-06, + "loss": 0.6091, "step": 4026 }, { - "epoch": 0.42, - "grad_norm": 3.359851858598546, - "learning_rate": 6.45697750409858e-06, - "loss": 0.6207, + "epoch": 0.29, + "grad_norm": 0.7948458039177386, + "learning_rate": 8.380574318286168e-06, + "loss": 0.4674, "step": 4027 }, { - "epoch": 0.42, - "grad_norm": 1.0414819211011148, - "learning_rate": 6.455347140712455e-06, - "loss": 0.5482, + "epoch": 0.29, + "grad_norm": 1.441637244693293, + "learning_rate": 8.379727527113066e-06, + "loss": 0.5354, "step": 4028 }, { - "epoch": 0.42, - "grad_norm": 1.9073262379332099, - "learning_rate": 6.453716608248306e-06, - "loss": 0.7179, + "epoch": 0.29, + "grad_norm": 1.5052493964780909, + "learning_rate": 8.378880557411309e-06, + "loss": 0.4993, "step": 4029 }, { - "epoch": 0.42, - "grad_norm": 2.894734987817326, - "learning_rate": 6.452085906895564e-06, - "loss": 0.6275, + "epoch": 0.29, + "grad_norm": 2.036816078459009, + "learning_rate": 8.378033409225627e-06, + "loss": 0.5297, "step": 4030 }, { - "epoch": 0.42, - "grad_norm": 6.409131230764491, - "learning_rate": 6.450455036843677e-06, - "loss": 0.5708, + "epoch": 0.29, + "grad_norm": 1.6309720788559503, + "learning_rate": 8.377186082600776e-06, + "loss": 0.5241, "step": 4031 }, { - "epoch": 0.42, - "grad_norm": 2.626357432074768, - "learning_rate": 6.448823998282119e-06, - "loss": 0.6892, + "epoch": 0.29, + "grad_norm": 1.6016800468588481, + "learning_rate": 8.376338577581513e-06, + "loss": 0.5029, "step": 4032 }, { - "epoch": 0.42, - "grad_norm": 2.3318787973963424, - "learning_rate": 6.447192791400375e-06, - "loss": 0.6684, + "epoch": 0.29, + "grad_norm": 1.5833455797845262, + "learning_rate": 8.375490894212607e-06, + "loss": 0.5827, "step": 4033 }, { - "epoch": 0.42, - "grad_norm": 2.341295423290045, - "learning_rate": 6.4455614163879576e-06, - "loss": 0.7042, + "epoch": 0.29, + "grad_norm": 1.5817555522493298, + "learning_rate": 8.374643032538832e-06, + "loss": 0.5525, "step": 4034 }, { - "epoch": 0.42, - "grad_norm": 2.51877291354241, - "learning_rate": 6.443929873434392e-06, - "loss": 0.6783, + "epoch": 0.29, + "grad_norm": 1.6676419961539768, + "learning_rate": 8.373794992604978e-06, + "loss": 0.5971, "step": 4035 }, { - "epoch": 0.42, - "grad_norm": 2.530723558434043, - "learning_rate": 6.442298162729229e-06, - "loss": 0.652, + "epoch": 0.29, + "grad_norm": 1.6407101931611179, + "learning_rate": 8.37294677445584e-06, + "loss": 0.5307, "step": 4036 }, { - "epoch": 0.42, - "grad_norm": 2.544615833177302, - "learning_rate": 6.4406662844620346e-06, - "loss": 0.6204, + "epoch": 0.29, + "grad_norm": 1.7829747704607677, + "learning_rate": 8.372098378136226e-06, + "loss": 0.5007, "step": 4037 }, { - "epoch": 0.42, - "grad_norm": 2.6992866068672643, - "learning_rate": 6.439034238822396e-06, - "loss": 0.6114, + "epoch": 0.29, + "grad_norm": 1.6369429228617751, + "learning_rate": 8.371249803690948e-06, + "loss": 0.5957, "step": 4038 }, { - "epoch": 0.43, - "grad_norm": 2.361537811446634, - "learning_rate": 6.4374020259999194e-06, - "loss": 0.6084, + "epoch": 0.29, + "grad_norm": 1.7893182738082392, + "learning_rate": 8.370401051164832e-06, + "loss": 0.5748, "step": 4039 }, { - "epoch": 0.43, - "grad_norm": 3.3932419299570737, - "learning_rate": 6.43576964618423e-06, - "loss": 0.5435, + "epoch": 0.29, + "grad_norm": 0.9998529409766825, + "learning_rate": 8.369552120602713e-06, + "loss": 0.4653, "step": 4040 }, { - "epoch": 0.43, - "grad_norm": 2.2247947497001896, - "learning_rate": 6.4341370995649735e-06, - "loss": 0.6274, + "epoch": 0.29, + "grad_norm": 1.6014843490147543, + "learning_rate": 8.368703012049432e-06, + "loss": 0.5645, "step": 4041 }, { - "epoch": 0.43, - "grad_norm": 4.203698930496953, - "learning_rate": 6.4325043863318136e-06, - "loss": 0.6563, + "epoch": 0.29, + "grad_norm": 1.602674383453791, + "learning_rate": 8.367853725549844e-06, + "loss": 0.5021, "step": 4042 }, { - "epoch": 0.43, - "grad_norm": 2.6533644667178184, - "learning_rate": 6.430871506674437e-06, - "loss": 0.5898, + "epoch": 0.29, + "grad_norm": 2.18057624731136, + "learning_rate": 8.36700426114881e-06, + "loss": 0.5328, "step": 4043 }, { - "epoch": 0.43, - "grad_norm": 3.265730806748852, - "learning_rate": 6.429238460782543e-06, - "loss": 0.5698, + "epoch": 0.29, + "grad_norm": 1.7475455916751526, + "learning_rate": 8.366154618891203e-06, + "loss": 0.5619, "step": 4044 }, { - "epoch": 0.43, - "grad_norm": 2.1642899955947548, - "learning_rate": 6.427605248845859e-06, - "loss": 0.6199, + "epoch": 0.29, + "grad_norm": 0.8193706706661844, + "learning_rate": 8.3653047988219e-06, + "loss": 0.4551, "step": 4045 }, { - "epoch": 0.43, - "grad_norm": 2.6003134035460604, - "learning_rate": 6.42597187105412e-06, - "loss": 0.6055, + "epoch": 0.29, + "grad_norm": 1.9166170012950614, + "learning_rate": 8.364454800985797e-06, + "loss": 0.5481, "step": 4046 }, { - "epoch": 0.43, - "grad_norm": 2.2193001480951926, - "learning_rate": 6.4243383275970924e-06, - "loss": 0.671, + "epoch": 0.29, + "grad_norm": 1.6903385298304876, + "learning_rate": 8.36360462542779e-06, + "loss": 0.5279, "step": 4047 }, { - "epoch": 0.43, - "grad_norm": 3.5572778898263016, - "learning_rate": 6.422704618664557e-06, - "loss": 0.6435, + "epoch": 0.29, + "grad_norm": 1.706970159522442, + "learning_rate": 8.36275427219279e-06, + "loss": 0.5683, "step": 4048 }, { - "epoch": 0.43, - "grad_norm": 4.222157891641835, - "learning_rate": 6.42107074444631e-06, - "loss": 0.6501, + "epoch": 0.29, + "grad_norm": 1.9399402860496497, + "learning_rate": 8.361903741325716e-06, + "loss": 0.6469, "step": 4049 }, { - "epoch": 0.43, - "grad_norm": 2.725601175114304, - "learning_rate": 6.419436705132172e-06, - "loss": 0.6936, + "epoch": 0.29, + "grad_norm": 1.4882637441868254, + "learning_rate": 8.361053032871494e-06, + "loss": 0.5279, "step": 4050 }, { - "epoch": 0.43, - "grad_norm": 2.4112413353819484, - "learning_rate": 6.4178025009119825e-06, - "loss": 0.6389, + "epoch": 0.29, + "grad_norm": 1.4501051210945841, + "learning_rate": 8.36020214687506e-06, + "loss": 0.6091, "step": 4051 }, { - "epoch": 0.43, - "grad_norm": 2.589327647378624, - "learning_rate": 6.416168131975595e-06, - "loss": 0.6985, + "epoch": 0.29, + "grad_norm": 0.811187473831592, + "learning_rate": 8.359351083381364e-06, + "loss": 0.4465, "step": 4052 }, { - "epoch": 0.43, - "grad_norm": 2.4983806609239703, - "learning_rate": 6.414533598512887e-06, - "loss": 0.6664, + "epoch": 0.29, + "grad_norm": 4.563611274422315, + "learning_rate": 8.358499842435361e-06, + "loss": 0.5835, "step": 4053 }, { - "epoch": 0.43, - "grad_norm": 2.240075636586892, - "learning_rate": 6.412898900713757e-06, - "loss": 0.6559, + "epoch": 0.29, + "grad_norm": 0.8685622488220026, + "learning_rate": 8.357648424082015e-06, + "loss": 0.4611, "step": 4054 }, { - "epoch": 0.43, - "grad_norm": 2.8847812663112897, - "learning_rate": 6.411264038768115e-06, - "loss": 0.6703, + "epoch": 0.29, + "grad_norm": 1.474062514149165, + "learning_rate": 8.356796828366303e-06, + "loss": 0.5211, "step": 4055 }, { - "epoch": 0.43, - "grad_norm": 2.367558885670087, - "learning_rate": 6.409629012865899e-06, - "loss": 0.5792, + "epoch": 0.29, + "grad_norm": 1.3786668605389012, + "learning_rate": 8.355945055333207e-06, + "loss": 0.5413, "step": 4056 }, { - "epoch": 0.43, - "grad_norm": 3.3855942924416382, - "learning_rate": 6.407993823197056e-06, - "loss": 0.7401, + "epoch": 0.29, + "grad_norm": 2.1682400213946407, + "learning_rate": 8.355093105027723e-06, + "loss": 0.6121, "step": 4057 }, { - "epoch": 0.43, - "grad_norm": 3.876331931348521, - "learning_rate": 6.406358469951562e-06, - "loss": 0.6485, + "epoch": 0.29, + "grad_norm": 1.7047341521217991, + "learning_rate": 8.354240977494852e-06, + "loss": 0.6238, "step": 4058 }, { - "epoch": 0.43, - "grad_norm": 2.206685527616314, - "learning_rate": 6.404722953319406e-06, - "loss": 0.6424, + "epoch": 0.29, + "grad_norm": 1.7724118439647396, + "learning_rate": 8.353388672779607e-06, + "loss": 0.5339, "step": 4059 }, { - "epoch": 0.43, - "grad_norm": 2.075266887505169, - "learning_rate": 6.403087273490599e-06, - "loss": 0.6713, + "epoch": 0.29, + "grad_norm": 1.6951343825977745, + "learning_rate": 8.35253619092701e-06, + "loss": 0.563, "step": 4060 }, { - "epoch": 0.43, - "grad_norm": 2.856229721355921, - "learning_rate": 6.401451430655168e-06, - "loss": 0.628, + "epoch": 0.29, + "grad_norm": 1.9331577950577106, + "learning_rate": 8.35168353198209e-06, + "loss": 0.5805, "step": 4061 }, { - "epoch": 0.43, - "grad_norm": 2.0646056376603608, - "learning_rate": 6.399815425003161e-06, - "loss": 0.6839, + "epoch": 0.29, + "grad_norm": 1.6835839625034101, + "learning_rate": 8.350830695989892e-06, + "loss": 0.5152, "step": 4062 }, { - "epoch": 0.43, - "grad_norm": 2.0561582164002683, - "learning_rate": 6.398179256724644e-06, - "loss": 0.6327, + "epoch": 0.29, + "grad_norm": 0.7828155323290966, + "learning_rate": 8.34997768299546e-06, + "loss": 0.4543, "step": 4063 }, { - "epoch": 0.43, - "grad_norm": 2.1686914862568094, - "learning_rate": 6.396542926009703e-06, - "loss": 0.5924, + "epoch": 0.29, + "grad_norm": 1.7340155865912827, + "learning_rate": 8.349124493043853e-06, + "loss": 0.6282, "step": 4064 }, { - "epoch": 0.43, - "grad_norm": 2.452702288596918, - "learning_rate": 6.394906433048442e-06, - "loss": 0.6387, + "epoch": 0.29, + "grad_norm": 1.6893213295076277, + "learning_rate": 8.348271126180147e-06, + "loss": 0.5495, "step": 4065 }, { - "epoch": 0.43, - "grad_norm": 3.400913216633786, - "learning_rate": 6.3932697780309825e-06, - "loss": 0.6913, + "epoch": 0.29, + "grad_norm": 1.720498452467447, + "learning_rate": 8.347417582449411e-06, + "loss": 0.5467, "step": 4066 }, { - "epoch": 0.43, - "grad_norm": 2.108636102788443, - "learning_rate": 6.3916329611474705e-06, - "loss": 0.6406, + "epoch": 0.29, + "grad_norm": 2.5315930944678313, + "learning_rate": 8.346563861896737e-06, + "loss": 0.4787, "step": 4067 }, { - "epoch": 0.43, - "grad_norm": 2.411671185444273, - "learning_rate": 6.389995982588061e-06, - "loss": 0.6552, + "epoch": 0.29, + "grad_norm": 1.7561425127838546, + "learning_rate": 8.345709964567222e-06, + "loss": 0.579, "step": 4068 }, { - "epoch": 0.43, - "grad_norm": 2.186150326143783, - "learning_rate": 6.388358842542939e-06, - "loss": 0.6234, + "epoch": 0.29, + "grad_norm": 1.616136042265237, + "learning_rate": 8.344855890505967e-06, + "loss": 0.5247, "step": 4069 }, { - "epoch": 0.43, - "grad_norm": 4.859739939616308, - "learning_rate": 6.386721541202296e-06, - "loss": 0.6838, + "epoch": 0.29, + "grad_norm": 1.60879726860104, + "learning_rate": 8.344001639758091e-06, + "loss": 0.6333, "step": 4070 }, { - "epoch": 0.43, - "grad_norm": 2.460365538292539, - "learning_rate": 6.3850840787563565e-06, - "loss": 0.5673, + "epoch": 0.29, + "grad_norm": 1.7208072487855657, + "learning_rate": 8.343147212368716e-06, + "loss": 0.6693, "step": 4071 }, { - "epoch": 0.43, - "grad_norm": 7.402069347480076, - "learning_rate": 6.383446455395352e-06, - "loss": 0.6837, + "epoch": 0.29, + "grad_norm": 2.012918535297233, + "learning_rate": 8.342292608382977e-06, + "loss": 0.569, "step": 4072 }, { - "epoch": 0.43, - "grad_norm": 3.2533740088151664, - "learning_rate": 6.3818086713095374e-06, - "loss": 0.6788, + "epoch": 0.29, + "grad_norm": 1.6070901485426803, + "learning_rate": 8.34143782784602e-06, + "loss": 0.6281, "step": 4073 }, { - "epoch": 0.43, - "grad_norm": 2.2858335624830324, - "learning_rate": 6.380170726689185e-06, - "loss": 0.7238, + "epoch": 0.29, + "grad_norm": 1.7723969564215614, + "learning_rate": 8.340582870802994e-06, + "loss": 0.5468, "step": 4074 }, { - "epoch": 0.43, - "grad_norm": 2.484411238766058, - "learning_rate": 6.378532621724588e-06, - "loss": 0.6292, + "epoch": 0.29, + "grad_norm": 1.7306295364803443, + "learning_rate": 8.33972773729906e-06, + "loss": 0.5522, "step": 4075 }, { - "epoch": 0.43, - "grad_norm": 8.867064268787102, - "learning_rate": 6.376894356606056e-06, - "loss": 0.5916, + "epoch": 0.29, + "grad_norm": 1.6071001374093452, + "learning_rate": 8.33887242737939e-06, + "loss": 0.6152, "step": 4076 }, { - "epoch": 0.43, - "grad_norm": 2.291422789683661, - "learning_rate": 6.375255931523916e-06, - "loss": 0.6287, + "epoch": 0.29, + "grad_norm": 1.7598936414064212, + "learning_rate": 8.338016941089167e-06, + "loss": 0.5662, "step": 4077 }, { - "epoch": 0.43, - "grad_norm": 2.1675402432820405, - "learning_rate": 6.373617346668519e-06, - "loss": 0.5687, + "epoch": 0.29, + "grad_norm": 2.1568654433272063, + "learning_rate": 8.337161278473576e-06, + "loss": 0.5323, "step": 4078 }, { - "epoch": 0.43, - "grad_norm": 2.2952408596599088, - "learning_rate": 6.371978602230229e-06, - "loss": 0.6288, + "epoch": 0.29, + "grad_norm": 2.0037071747572317, + "learning_rate": 8.33630543957782e-06, + "loss": 0.6723, "step": 4079 }, { - "epoch": 0.43, - "grad_norm": 4.141696812562484, - "learning_rate": 6.370339698399432e-06, - "loss": 0.6161, + "epoch": 0.29, + "grad_norm": 1.5245329803549432, + "learning_rate": 8.335449424447106e-06, + "loss": 0.4889, "step": 4080 }, { - "epoch": 0.43, - "grad_norm": 3.8668906189282164, - "learning_rate": 6.3687006353665285e-06, - "loss": 0.6, + "epoch": 0.29, + "grad_norm": 1.5967757302811463, + "learning_rate": 8.33459323312665e-06, + "loss": 0.6005, "step": 4081 }, { - "epoch": 0.43, - "grad_norm": 3.230532760665731, - "learning_rate": 6.367061413321942e-06, - "loss": 0.6102, + "epoch": 0.29, + "grad_norm": 1.5680846125588224, + "learning_rate": 8.33373686566168e-06, + "loss": 0.6203, "step": 4082 }, { - "epoch": 0.43, - "grad_norm": 2.68067571548342, - "learning_rate": 6.365422032456113e-06, - "loss": 0.64, + "epoch": 0.29, + "grad_norm": 1.8050437486633064, + "learning_rate": 8.332880322097432e-06, + "loss": 0.5773, "step": 4083 }, { - "epoch": 0.43, - "grad_norm": 2.574913062898836, - "learning_rate": 6.363782492959499e-06, - "loss": 0.6528, + "epoch": 0.29, + "grad_norm": 1.3672771543109208, + "learning_rate": 8.332023602479155e-06, + "loss": 0.5277, "step": 4084 }, { - "epoch": 0.43, - "grad_norm": 4.168916407705001, - "learning_rate": 6.362142795022578e-06, - "loss": 0.6213, + "epoch": 0.29, + "grad_norm": 0.9450297733274493, + "learning_rate": 8.331166706852099e-06, + "loss": 0.4704, "step": 4085 }, { - "epoch": 0.43, - "grad_norm": 2.7508588377575767, - "learning_rate": 6.360502938835844e-06, - "loss": 0.6605, + "epoch": 0.29, + "grad_norm": 8.472961704075287, + "learning_rate": 8.330309635261529e-06, + "loss": 0.5898, "step": 4086 }, { - "epoch": 0.43, - "grad_norm": 3.4135918342912577, - "learning_rate": 6.35886292458981e-06, - "loss": 0.6797, + "epoch": 0.29, + "grad_norm": 1.5245774890814165, + "learning_rate": 8.32945238775272e-06, + "loss": 0.5415, "step": 4087 }, { - "epoch": 0.43, - "grad_norm": 2.555329036173374, - "learning_rate": 6.35722275247501e-06, - "loss": 0.691, + "epoch": 0.29, + "grad_norm": 2.0345273225569636, + "learning_rate": 8.328594964370957e-06, + "loss": 0.5869, "step": 4088 }, { - "epoch": 0.43, - "grad_norm": 6.3576452705524025, - "learning_rate": 6.355582422681996e-06, - "loss": 0.6184, + "epoch": 0.29, + "grad_norm": 1.7491823282944452, + "learning_rate": 8.327737365161527e-06, + "loss": 0.5648, "step": 4089 }, { - "epoch": 0.43, - "grad_norm": 3.01931185233685, - "learning_rate": 6.353941935401333e-06, - "loss": 0.6206, + "epoch": 0.29, + "grad_norm": 0.8522423883394311, + "learning_rate": 8.326879590169732e-06, + "loss": 0.4674, "step": 4090 }, { - "epoch": 0.43, - "grad_norm": 2.5007928234352677, - "learning_rate": 6.352301290823611e-06, - "loss": 0.6169, + "epoch": 0.29, + "grad_norm": 1.763586874804926, + "learning_rate": 8.326021639440886e-06, + "loss": 0.5025, "step": 4091 }, { - "epoch": 0.43, - "grad_norm": 3.556883601705964, - "learning_rate": 6.350660489139433e-06, - "loss": 0.6329, + "epoch": 0.29, + "grad_norm": 2.0198517030866, + "learning_rate": 8.325163513020307e-06, + "loss": 0.5605, "step": 4092 }, { - "epoch": 0.43, - "grad_norm": 2.5252161752585427, - "learning_rate": 6.349019530539425e-06, - "loss": 0.6555, + "epoch": 0.29, + "grad_norm": 1.628033212624208, + "learning_rate": 8.324305210953323e-06, + "loss": 0.5832, "step": 4093 }, { - "epoch": 0.43, - "grad_norm": 2.5247540508658526, - "learning_rate": 6.347378415214226e-06, - "loss": 0.6158, + "epoch": 0.29, + "grad_norm": 1.6377785613614986, + "learning_rate": 8.323446733285274e-06, + "loss": 0.5093, "step": 4094 }, { - "epoch": 0.43, - "grad_norm": 2.182461495472191, - "learning_rate": 6.3457371433544975e-06, - "loss": 0.7378, + "epoch": 0.29, + "grad_norm": 1.525521517055867, + "learning_rate": 8.322588080061506e-06, + "loss": 0.5586, "step": 4095 }, { - "epoch": 0.43, - "grad_norm": 2.6257667849308226, - "learning_rate": 6.34409571515092e-06, - "loss": 0.6825, + "epoch": 0.29, + "grad_norm": 1.6797857632105606, + "learning_rate": 8.321729251327379e-06, + "loss": 0.5347, "step": 4096 }, { - "epoch": 0.43, - "grad_norm": 2.4163772839213733, - "learning_rate": 6.342454130794186e-06, - "loss": 0.5942, + "epoch": 0.29, + "grad_norm": 1.7589641907977436, + "learning_rate": 8.320870247128257e-06, + "loss": 0.5023, "step": 4097 }, { - "epoch": 0.43, - "grad_norm": 2.548472388600027, - "learning_rate": 6.340812390475012e-06, - "loss": 0.6612, + "epoch": 0.29, + "grad_norm": 1.6197745062379005, + "learning_rate": 8.320011067509515e-06, + "loss": 0.5107, "step": 4098 }, { - "epoch": 0.43, - "grad_norm": 2.6494112784435204, - "learning_rate": 6.33917049438413e-06, - "loss": 0.7117, + "epoch": 0.29, + "grad_norm": 2.0996970864576308, + "learning_rate": 8.31915171251654e-06, + "loss": 0.5066, "step": 4099 }, { - "epoch": 0.43, - "grad_norm": 4.975703696322628, - "learning_rate": 6.3375284427122915e-06, - "loss": 0.726, + "epoch": 0.29, + "grad_norm": 7.833243962681858, + "learning_rate": 8.318292182194723e-06, + "loss": 0.5562, "step": 4100 }, { - "epoch": 0.43, - "grad_norm": 2.124929550258763, - "learning_rate": 6.335886235650264e-06, - "loss": 0.599, + "epoch": 0.29, + "grad_norm": 1.6865095989599892, + "learning_rate": 8.317432476589471e-06, + "loss": 0.5656, "step": 4101 }, { - "epoch": 0.43, - "grad_norm": 2.5941633006024762, - "learning_rate": 6.334243873388838e-06, - "loss": 0.7624, + "epoch": 0.29, + "grad_norm": 1.7701389993132288, + "learning_rate": 8.316572595746193e-06, + "loss": 0.5896, "step": 4102 }, { - "epoch": 0.43, - "grad_norm": 7.256377886619029, - "learning_rate": 6.332601356118813e-06, - "loss": 0.5449, + "epoch": 0.29, + "grad_norm": 2.0803635314486497, + "learning_rate": 8.315712539710312e-06, + "loss": 0.5443, "step": 4103 }, { - "epoch": 0.43, - "grad_norm": 3.9556721482690977, - "learning_rate": 6.330958684031016e-06, - "loss": 0.6911, + "epoch": 0.29, + "grad_norm": 1.6380505916811134, + "learning_rate": 8.314852308527261e-06, + "loss": 0.5053, "step": 4104 }, { - "epoch": 0.43, - "grad_norm": 2.733540693074302, - "learning_rate": 6.329315857316285e-06, - "loss": 0.6602, + "epoch": 0.29, + "grad_norm": 1.6002680706945174, + "learning_rate": 8.31399190224248e-06, + "loss": 0.4818, "step": 4105 }, { - "epoch": 0.43, - "grad_norm": 2.101209748842294, - "learning_rate": 6.327672876165481e-06, - "loss": 0.6334, + "epoch": 0.29, + "grad_norm": 1.8824503573752636, + "learning_rate": 8.313131320901415e-06, + "loss": 0.5624, "step": 4106 }, { - "epoch": 0.43, - "grad_norm": 2.230682903167034, - "learning_rate": 6.326029740769481e-06, - "loss": 0.6723, + "epoch": 0.29, + "grad_norm": 3.3461044406946416, + "learning_rate": 8.312270564549527e-06, + "loss": 0.5217, "step": 4107 }, { - "epoch": 0.43, - "grad_norm": 3.8797719596760216, - "learning_rate": 6.324386451319179e-06, - "loss": 0.6211, + "epoch": 0.29, + "grad_norm": 1.770712909533739, + "learning_rate": 8.311409633232286e-06, + "loss": 0.51, "step": 4108 }, { - "epoch": 0.43, - "grad_norm": 3.1406370239153274, - "learning_rate": 6.322743008005488e-06, - "loss": 0.6444, + "epoch": 0.29, + "grad_norm": 1.5548306704945407, + "learning_rate": 8.310548526995166e-06, + "loss": 0.5922, "step": 4109 }, { - "epoch": 0.43, - "grad_norm": 4.712312589801154, - "learning_rate": 6.321099411019336e-06, - "loss": 0.6464, + "epoch": 0.29, + "grad_norm": 0.7984810414335358, + "learning_rate": 8.309687245883657e-06, + "loss": 0.468, "step": 4110 }, { - "epoch": 0.43, - "grad_norm": 2.233736620239088, - "learning_rate": 6.319455660551674e-06, - "loss": 0.6325, + "epoch": 0.29, + "grad_norm": 1.5025004032114275, + "learning_rate": 8.308825789943251e-06, + "loss": 0.5817, "step": 4111 }, { - "epoch": 0.43, - "grad_norm": 2.038358257077351, - "learning_rate": 6.317811756793467e-06, - "loss": 0.6742, + "epoch": 0.29, + "grad_norm": 1.6244755862428146, + "learning_rate": 8.307964159219455e-06, + "loss": 0.6257, "step": 4112 }, { - "epoch": 0.43, - "grad_norm": 2.143450702921909, - "learning_rate": 6.316167699935702e-06, - "loss": 0.6708, + "epoch": 0.29, + "grad_norm": 2.49584720937432, + "learning_rate": 8.307102353757784e-06, + "loss": 0.5124, "step": 4113 }, { - "epoch": 0.43, - "grad_norm": 3.8705182545419534, - "learning_rate": 6.314523490169375e-06, - "loss": 0.5526, + "epoch": 0.29, + "grad_norm": 2.086362009132171, + "learning_rate": 8.306240373603761e-06, + "loss": 0.5178, "step": 4114 }, { - "epoch": 0.43, - "grad_norm": 2.073511807722659, - "learning_rate": 6.312879127685512e-06, - "loss": 0.6384, + "epoch": 0.29, + "grad_norm": 1.8283779148523402, + "learning_rate": 8.305378218802918e-06, + "loss": 0.576, "step": 4115 }, { - "epoch": 0.43, - "grad_norm": 2.653135511720908, - "learning_rate": 6.311234612675143e-06, - "loss": 0.709, + "epoch": 0.29, + "grad_norm": 1.844264055519146, + "learning_rate": 8.304515889400798e-06, + "loss": 0.5154, "step": 4116 }, { - "epoch": 0.43, - "grad_norm": 2.0677937210863715, - "learning_rate": 6.309589945329332e-06, - "loss": 0.6508, + "epoch": 0.29, + "grad_norm": 1.5221188986874985, + "learning_rate": 8.30365338544295e-06, + "loss": 0.5758, "step": 4117 }, { - "epoch": 0.43, - "grad_norm": 3.338910592281395, - "learning_rate": 6.307945125839143e-06, - "loss": 0.678, + "epoch": 0.29, + "grad_norm": 1.9206088393134741, + "learning_rate": 8.302790706974937e-06, + "loss": 0.5791, "step": 4118 }, { - "epoch": 0.43, - "grad_norm": 2.715799303343155, - "learning_rate": 6.3063001543956715e-06, - "loss": 0.6564, + "epoch": 0.29, + "grad_norm": 1.5452784917062203, + "learning_rate": 8.301927854042326e-06, + "loss": 0.6155, "step": 4119 }, { - "epoch": 0.43, - "grad_norm": 2.4933350769017357, - "learning_rate": 6.304655031190024e-06, - "loss": 0.5819, + "epoch": 0.29, + "grad_norm": 1.5935381546851894, + "learning_rate": 8.3010648266907e-06, + "loss": 0.5611, "step": 4120 }, { - "epoch": 0.43, - "grad_norm": 2.044652665239333, - "learning_rate": 6.303009756413327e-06, - "loss": 0.6325, + "epoch": 0.29, + "grad_norm": 1.6801042740513492, + "learning_rate": 8.300201624965642e-06, + "loss": 0.4994, "step": 4121 }, { - "epoch": 0.43, - "grad_norm": 6.095867866117808, - "learning_rate": 6.3013643302567225e-06, - "loss": 0.6967, + "epoch": 0.29, + "grad_norm": 1.6942249255069135, + "learning_rate": 8.299338248912752e-06, + "loss": 0.5507, "step": 4122 }, { - "epoch": 0.43, - "grad_norm": 2.849872100475443, - "learning_rate": 6.299718752911371e-06, - "loss": 0.7464, + "epoch": 0.29, + "grad_norm": 1.6278980817763506, + "learning_rate": 8.298474698577637e-06, + "loss": 0.5614, "step": 4123 }, { - "epoch": 0.43, - "grad_norm": 1.9748453249208904, - "learning_rate": 6.298073024568454e-06, - "loss": 0.6542, + "epoch": 0.29, + "grad_norm": 1.7807889265099264, + "learning_rate": 8.29761097400591e-06, + "loss": 0.6457, "step": 4124 }, { - "epoch": 0.43, - "grad_norm": 2.2600324826958365, - "learning_rate": 6.296427145419164e-06, - "loss": 0.5836, + "epoch": 0.29, + "grad_norm": 1.4840676387978882, + "learning_rate": 8.296747075243199e-06, + "loss": 0.5644, "step": 4125 }, { - "epoch": 0.43, - "grad_norm": 2.802818402196517, - "learning_rate": 6.294781115654718e-06, - "loss": 0.6668, + "epoch": 0.29, + "grad_norm": 1.6047686403007575, + "learning_rate": 8.295883002335137e-06, + "loss": 0.5934, "step": 4126 }, { - "epoch": 0.43, - "grad_norm": 2.676232174418196, - "learning_rate": 6.293134935466342e-06, - "loss": 0.6596, + "epoch": 0.29, + "grad_norm": 1.6310454121845173, + "learning_rate": 8.295018755327365e-06, + "loss": 0.5928, "step": 4127 }, { - "epoch": 0.43, - "grad_norm": 2.228739627526291, - "learning_rate": 6.291488605045288e-06, - "loss": 0.5731, + "epoch": 0.29, + "grad_norm": 1.564294637774086, + "learning_rate": 8.294154334265539e-06, + "loss": 0.5698, "step": 4128 }, { - "epoch": 0.43, - "grad_norm": 2.6049506326721703, - "learning_rate": 6.289842124582822e-06, - "loss": 0.5658, + "epoch": 0.29, + "grad_norm": 2.0135685027527077, + "learning_rate": 8.293289739195318e-06, + "loss": 0.5104, "step": 4129 }, { - "epoch": 0.43, - "grad_norm": 2.6541990876497694, - "learning_rate": 6.2881954942702265e-06, - "loss": 0.6145, + "epoch": 0.29, + "grad_norm": 1.4670726892515684, + "learning_rate": 8.292424970162374e-06, + "loss": 0.5445, "step": 4130 }, { - "epoch": 0.43, - "grad_norm": 1.0595704649971625, - "learning_rate": 6.286548714298801e-06, - "loss": 0.6131, + "epoch": 0.29, + "grad_norm": 1.7207675500827189, + "learning_rate": 8.291560027212386e-06, + "loss": 0.5814, "step": 4131 }, { - "epoch": 0.43, - "grad_norm": 2.420647946200874, - "learning_rate": 6.284901784859866e-06, - "loss": 0.6737, + "epoch": 0.29, + "grad_norm": 1.9943184225422592, + "learning_rate": 8.290694910391046e-06, + "loss": 0.6088, "step": 4132 }, { - "epoch": 0.43, - "grad_norm": 3.041851672678566, - "learning_rate": 6.283254706144756e-06, - "loss": 0.6566, + "epoch": 0.29, + "grad_norm": 1.450420516516994, + "learning_rate": 8.28982961974405e-06, + "loss": 0.5045, "step": 4133 }, { - "epoch": 0.44, - "grad_norm": 2.712373901252542, - "learning_rate": 6.281607478344823e-06, - "loss": 0.6394, + "epoch": 0.29, + "grad_norm": 1.591027872393627, + "learning_rate": 8.288964155317104e-06, + "loss": 0.5445, "step": 4134 }, { - "epoch": 0.44, - "grad_norm": 2.4741338459449853, - "learning_rate": 6.279960101651439e-06, - "loss": 0.7035, + "epoch": 0.29, + "grad_norm": 1.6777776382350158, + "learning_rate": 8.288098517155928e-06, + "loss": 0.6044, "step": 4135 }, { - "epoch": 0.44, - "grad_norm": 1.9857951374629823, - "learning_rate": 6.278312576255988e-06, - "loss": 0.6226, + "epoch": 0.29, + "grad_norm": 1.5113751985555637, + "learning_rate": 8.287232705306248e-06, + "loss": 0.5857, "step": 4136 }, { - "epoch": 0.44, - "grad_norm": 3.3494664192954384, - "learning_rate": 6.276664902349881e-06, - "loss": 0.6609, + "epoch": 0.29, + "grad_norm": 1.6135071642686558, + "learning_rate": 8.286366719813795e-06, + "loss": 0.5402, "step": 4137 }, { - "epoch": 0.44, - "grad_norm": 2.2270633620793228, - "learning_rate": 6.275017080124533e-06, - "loss": 0.6487, + "epoch": 0.29, + "grad_norm": 5.181313733656596, + "learning_rate": 8.285500560724317e-06, + "loss": 0.5668, "step": 4138 }, { - "epoch": 0.44, - "grad_norm": 3.2352633292562363, - "learning_rate": 6.273369109771387e-06, - "loss": 0.7118, + "epoch": 0.29, + "grad_norm": 1.672200608491575, + "learning_rate": 8.284634228083569e-06, + "loss": 0.558, "step": 4139 }, { - "epoch": 0.44, - "grad_norm": 2.268087699809955, - "learning_rate": 6.271720991481897e-06, - "loss": 0.6689, + "epoch": 0.29, + "grad_norm": 2.0314066053038857, + "learning_rate": 8.28376772193731e-06, + "loss": 0.5642, "step": 4140 }, { - "epoch": 0.44, - "grad_norm": 5.099756078374374, - "learning_rate": 6.270072725447542e-06, - "loss": 0.7536, + "epoch": 0.29, + "grad_norm": 0.8139778185289855, + "learning_rate": 8.282901042331311e-06, + "loss": 0.4771, "step": 4141 }, { - "epoch": 0.44, - "grad_norm": 2.4160875739712644, - "learning_rate": 6.268424311859808e-06, - "loss": 0.6797, + "epoch": 0.29, + "grad_norm": 1.6225810118990223, + "learning_rate": 8.282034189311358e-06, + "loss": 0.5634, "step": 4142 }, { - "epoch": 0.44, - "grad_norm": 3.1608916128751066, - "learning_rate": 6.266775750910203e-06, - "loss": 0.6239, + "epoch": 0.29, + "grad_norm": 1.6581110255224374, + "learning_rate": 8.281167162923236e-06, + "loss": 0.6011, "step": 4143 }, { - "epoch": 0.44, - "grad_norm": 2.642655322409538, - "learning_rate": 6.265127042790253e-06, - "loss": 0.7106, + "epoch": 0.29, + "grad_norm": 1.5184795827731392, + "learning_rate": 8.280299963212749e-06, + "loss": 0.5183, "step": 4144 }, { - "epoch": 0.44, - "grad_norm": 8.48267619018257, - "learning_rate": 6.263478187691502e-06, - "loss": 0.6842, + "epoch": 0.29, + "grad_norm": 1.6513978768158977, + "learning_rate": 8.279432590225698e-06, + "loss": 0.4386, "step": 4145 }, { - "epoch": 0.44, - "grad_norm": 2.2073464735347583, - "learning_rate": 6.2618291858055065e-06, - "loss": 0.716, + "epoch": 0.29, + "grad_norm": 1.4860005664476885, + "learning_rate": 8.278565044007908e-06, + "loss": 0.5383, "step": 4146 }, { - "epoch": 0.44, - "grad_norm": 3.2192677534009446, - "learning_rate": 6.260180037323843e-06, - "loss": 0.6008, + "epoch": 0.29, + "grad_norm": 1.8013062184567505, + "learning_rate": 8.277697324605205e-06, + "loss": 0.5716, "step": 4147 }, { - "epoch": 0.44, - "grad_norm": 2.8880065557661028, - "learning_rate": 6.258530742438107e-06, - "loss": 0.6526, + "epoch": 0.29, + "grad_norm": 1.7150381784744386, + "learning_rate": 8.27682943206342e-06, + "loss": 0.6185, "step": 4148 }, { - "epoch": 0.44, - "grad_norm": 1.923108042812208, - "learning_rate": 6.256881301339907e-06, - "loss": 0.5658, + "epoch": 0.29, + "grad_norm": 1.7854723183609789, + "learning_rate": 8.275961366428403e-06, + "loss": 0.5437, "step": 4149 }, { - "epoch": 0.44, - "grad_norm": 2.3215056242914405, - "learning_rate": 6.255231714220871e-06, - "loss": 0.664, + "epoch": 0.29, + "grad_norm": 1.6113544747838433, + "learning_rate": 8.275093127746003e-06, + "loss": 0.5246, "step": 4150 }, { - "epoch": 0.44, - "grad_norm": 2.598137114476907, - "learning_rate": 6.253581981272641e-06, - "loss": 0.6033, + "epoch": 0.29, + "grad_norm": 1.7206988325228054, + "learning_rate": 8.274224716062089e-06, + "loss": 0.5857, "step": 4151 }, { - "epoch": 0.44, - "grad_norm": 3.292031250462775, - "learning_rate": 6.251932102686883e-06, - "loss": 0.6393, + "epoch": 0.29, + "grad_norm": 1.6105741410605208, + "learning_rate": 8.273356131422533e-06, + "loss": 0.5908, "step": 4152 }, { - "epoch": 0.44, - "grad_norm": 2.9711599193058196, - "learning_rate": 6.2502820786552695e-06, - "loss": 0.7229, + "epoch": 0.29, + "grad_norm": 1.809310917055256, + "learning_rate": 8.272487373873211e-06, + "loss": 0.6126, "step": 4153 }, { - "epoch": 0.44, - "grad_norm": 3.4776790489556118, - "learning_rate": 6.2486319093695006e-06, - "loss": 0.6444, + "epoch": 0.29, + "grad_norm": 1.4058128394862073, + "learning_rate": 8.271618443460018e-06, + "loss": 0.5659, "step": 4154 }, { - "epoch": 0.44, - "grad_norm": 2.574952252391362, - "learning_rate": 6.246981595021284e-06, - "loss": 0.6376, + "epoch": 0.29, + "grad_norm": 2.0256917228512523, + "learning_rate": 8.270749340228855e-06, + "loss": 0.6181, "step": 4155 }, { - "epoch": 0.44, - "grad_norm": 2.485791306054375, - "learning_rate": 6.245331135802351e-06, - "loss": 0.6843, + "epoch": 0.29, + "grad_norm": 1.5544592446142869, + "learning_rate": 8.269880064225627e-06, + "loss": 0.6261, "step": 4156 }, { - "epoch": 0.44, - "grad_norm": 2.581681311574103, - "learning_rate": 6.243680531904448e-06, - "loss": 0.6037, + "epoch": 0.29, + "grad_norm": 1.4232600505157573, + "learning_rate": 8.269010615496253e-06, + "loss": 0.5521, "step": 4157 }, { - "epoch": 0.44, - "grad_norm": 3.6015305354626954, - "learning_rate": 6.242029783519334e-06, - "loss": 0.6377, + "epoch": 0.3, + "grad_norm": 1.8211907323115697, + "learning_rate": 8.268140994086663e-06, + "loss": 0.623, "step": 4158 }, { - "epoch": 0.44, - "grad_norm": 2.514525749517879, - "learning_rate": 6.240378890838792e-06, - "loss": 0.688, + "epoch": 0.3, + "grad_norm": 1.8713301798248745, + "learning_rate": 8.267271200042792e-06, + "loss": 0.5932, "step": 4159 }, { - "epoch": 0.44, - "grad_norm": 2.1131329820828673, - "learning_rate": 6.238727854054614e-06, - "loss": 0.6627, + "epoch": 0.3, + "grad_norm": 1.6874683261639822, + "learning_rate": 8.266401233410585e-06, + "loss": 0.5628, "step": 4160 }, { - "epoch": 0.44, - "grad_norm": 2.3327635807486757, - "learning_rate": 6.237076673358616e-06, - "loss": 0.6625, + "epoch": 0.3, + "grad_norm": 1.5580136383448697, + "learning_rate": 8.265531094235997e-06, + "loss": 0.5636, "step": 4161 }, { - "epoch": 0.44, - "grad_norm": 2.564116461115227, - "learning_rate": 6.235425348942625e-06, - "loss": 0.6025, + "epoch": 0.3, + "grad_norm": 1.7939152210109308, + "learning_rate": 8.26466078256499e-06, + "loss": 0.5812, "step": 4162 }, { - "epoch": 0.44, - "grad_norm": 25.51131291171202, - "learning_rate": 6.2337738809984905e-06, - "loss": 0.6809, + "epoch": 0.3, + "grad_norm": 0.8805465098353324, + "learning_rate": 8.26379029844354e-06, + "loss": 0.4694, "step": 4163 }, { - "epoch": 0.44, - "grad_norm": 2.404343930899044, - "learning_rate": 6.23212226971807e-06, - "loss": 0.6425, + "epoch": 0.3, + "grad_norm": 1.6306992913118648, + "learning_rate": 8.262919641917626e-06, + "loss": 0.578, "step": 4164 }, { - "epoch": 0.44, - "grad_norm": 2.986695766080571, - "learning_rate": 6.230470515293248e-06, - "loss": 0.674, + "epoch": 0.3, + "grad_norm": 1.618054579545835, + "learning_rate": 8.26204881303324e-06, + "loss": 0.5885, "step": 4165 }, { - "epoch": 0.44, - "grad_norm": 2.5976631521397846, - "learning_rate": 6.2288186179159175e-06, - "loss": 0.6709, - "step": 4166 + "epoch": 0.3, + "grad_norm": 2.957971751574563, + "learning_rate": 8.261177811836386e-06, + "loss": 0.5854, + "step": 4166 }, { - "epoch": 0.44, - "grad_norm": 12.729513809408251, - "learning_rate": 6.227166577777992e-06, - "loss": 0.6686, + "epoch": 0.3, + "grad_norm": 1.64962488471696, + "learning_rate": 8.260306638373065e-06, + "loss": 0.5483, "step": 4167 }, { - "epoch": 0.44, - "grad_norm": 2.51466132483747, - "learning_rate": 6.225514395071401e-06, - "loss": 0.7087, + "epoch": 0.3, + "grad_norm": 1.681156296000756, + "learning_rate": 8.259435292689302e-06, + "loss": 0.5266, "step": 4168 }, { - "epoch": 0.44, - "grad_norm": 2.7437128880343717, - "learning_rate": 6.223862069988091e-06, - "loss": 0.6417, + "epoch": 0.3, + "grad_norm": 2.2238159053441593, + "learning_rate": 8.258563774831122e-06, + "loss": 0.6135, "step": 4169 }, { - "epoch": 0.44, - "grad_norm": 3.7709270732185365, - "learning_rate": 6.222209602720023e-06, - "loss": 0.6492, + "epoch": 0.3, + "grad_norm": 1.7831033463205608, + "learning_rate": 8.257692084844562e-06, + "loss": 0.5523, "step": 4170 }, { - "epoch": 0.44, - "grad_norm": 2.6653325226917635, - "learning_rate": 6.220556993459174e-06, - "loss": 0.6882, + "epoch": 0.3, + "grad_norm": 1.9485285350069337, + "learning_rate": 8.256820222775667e-06, + "loss": 0.5598, "step": 4171 }, { - "epoch": 0.44, - "grad_norm": 2.290790865423674, - "learning_rate": 6.218904242397546e-06, - "loss": 0.6277, + "epoch": 0.3, + "grad_norm": 1.659468480362964, + "learning_rate": 8.255948188670493e-06, + "loss": 0.6267, "step": 4172 }, { - "epoch": 0.44, - "grad_norm": 3.0699405954785104, - "learning_rate": 6.217251349727145e-06, - "loss": 0.7121, + "epoch": 0.3, + "grad_norm": 1.6062076541930679, + "learning_rate": 8.255075982575102e-06, + "loss": 0.5949, "step": 4173 }, { - "epoch": 0.44, - "grad_norm": 2.734217356286444, - "learning_rate": 6.215598315640001e-06, - "loss": 0.7427, + "epoch": 0.3, + "grad_norm": 1.7605192474485087, + "learning_rate": 8.25420360453557e-06, + "loss": 0.5539, "step": 4174 }, { - "epoch": 0.44, - "grad_norm": 2.713834982597247, - "learning_rate": 6.213945140328157e-06, - "loss": 0.6955, + "epoch": 0.3, + "grad_norm": 1.5306436737346203, + "learning_rate": 8.253331054597974e-06, + "loss": 0.5991, "step": 4175 }, { - "epoch": 0.44, - "grad_norm": 5.311432316870064, - "learning_rate": 6.212291823983678e-06, - "loss": 0.6851, + "epoch": 0.3, + "grad_norm": 1.4960698789876439, + "learning_rate": 8.252458332808411e-06, + "loss": 0.5941, "step": 4176 }, { - "epoch": 0.44, - "grad_norm": 2.399335810386324, - "learning_rate": 6.2106383667986385e-06, - "loss": 0.6589, + "epoch": 0.3, + "grad_norm": 1.5532973188477233, + "learning_rate": 8.251585439212975e-06, + "loss": 0.5318, "step": 4177 }, { - "epoch": 0.44, - "grad_norm": 2.1179610916249185, - "learning_rate": 6.208984768965133e-06, - "loss": 0.585, + "epoch": 0.3, + "grad_norm": 0.7384839412581263, + "learning_rate": 8.25071237385778e-06, + "loss": 0.4501, "step": 4178 }, { - "epoch": 0.44, - "grad_norm": 3.492230402979365, - "learning_rate": 6.207331030675272e-06, - "loss": 0.6906, + "epoch": 0.3, + "grad_norm": 1.6589988343129711, + "learning_rate": 8.249839136788942e-06, + "loss": 0.5413, "step": 4179 }, { - "epoch": 0.44, - "grad_norm": 10.05084961096072, - "learning_rate": 6.2056771521211815e-06, - "loss": 0.6719, + "epoch": 0.3, + "grad_norm": 2.5953690359593615, + "learning_rate": 8.24896572805259e-06, + "loss": 0.5143, "step": 4180 }, { - "epoch": 0.44, - "grad_norm": 2.525761336265004, - "learning_rate": 6.204023133495005e-06, - "loss": 0.6352, + "epoch": 0.3, + "grad_norm": 1.7398918107833146, + "learning_rate": 8.248092147694858e-06, + "loss": 0.5924, "step": 4181 }, { - "epoch": 0.44, - "grad_norm": 2.2251077581550414, - "learning_rate": 6.2023689749889e-06, - "loss": 0.6294, + "epoch": 0.3, + "grad_norm": 1.7930315917438435, + "learning_rate": 8.247218395761891e-06, + "loss": 0.4919, "step": 4182 }, { - "epoch": 0.44, - "grad_norm": 2.3917017204363615, - "learning_rate": 6.2007146767950455e-06, - "loss": 0.6562, + "epoch": 0.3, + "grad_norm": 1.5766698561857364, + "learning_rate": 8.246344472299847e-06, + "loss": 0.6004, "step": 4183 }, { - "epoch": 0.44, - "grad_norm": 5.7924703663410675, - "learning_rate": 6.199060239105628e-06, - "loss": 0.6758, + "epoch": 0.3, + "grad_norm": 1.6331213742360968, + "learning_rate": 8.24547037735489e-06, + "loss": 0.5546, "step": 4184 }, { - "epoch": 0.44, - "grad_norm": 2.3713543077213854, - "learning_rate": 6.197405662112862e-06, - "loss": 0.6653, + "epoch": 0.3, + "grad_norm": 1.6150738681792824, + "learning_rate": 8.24459611097319e-06, + "loss": 0.5925, "step": 4185 }, { - "epoch": 0.44, - "grad_norm": 2.439148199597049, - "learning_rate": 6.195750946008965e-06, - "loss": 0.6896, + "epoch": 0.3, + "grad_norm": 1.6684522297024726, + "learning_rate": 8.243721673200927e-06, + "loss": 0.6121, "step": 4186 }, { - "epoch": 0.44, - "grad_norm": 3.7616942745004667, - "learning_rate": 6.19409609098618e-06, - "loss": 0.6445, + "epoch": 0.3, + "grad_norm": 1.8655567828720996, + "learning_rate": 8.242847064084294e-06, + "loss": 0.5874, "step": 4187 }, { - "epoch": 0.44, - "grad_norm": 2.610494475761507, - "learning_rate": 6.192441097236762e-06, - "loss": 0.7334, + "epoch": 0.3, + "grad_norm": 1.6653990070026172, + "learning_rate": 8.241972283669491e-06, + "loss": 0.4547, "step": 4188 }, { - "epoch": 0.44, - "grad_norm": 2.098962128255649, - "learning_rate": 6.190785964952985e-06, - "loss": 0.634, + "epoch": 0.3, + "grad_norm": 1.8035491822098664, + "learning_rate": 8.241097332002727e-06, + "loss": 0.5524, "step": 4189 }, { - "epoch": 0.44, - "grad_norm": 1.11012265793801, - "learning_rate": 6.189130694327138e-06, - "loss": 0.6304, + "epoch": 0.3, + "grad_norm": 1.7536452237578264, + "learning_rate": 8.24022220913022e-06, + "loss": 0.5854, "step": 4190 }, { - "epoch": 0.44, - "grad_norm": 2.6761648104534377, - "learning_rate": 6.187475285551523e-06, - "loss": 0.6746, + "epoch": 0.3, + "grad_norm": 1.4971351313299557, + "learning_rate": 8.239346915098196e-06, + "loss": 0.5642, "step": 4191 }, { - "epoch": 0.44, - "grad_norm": 2.6521807011331933, - "learning_rate": 6.185819738818463e-06, - "loss": 0.6235, + "epoch": 0.3, + "grad_norm": 1.695306458457725, + "learning_rate": 8.238471449952892e-06, + "loss": 0.5534, "step": 4192 }, { - "epoch": 0.44, - "grad_norm": 3.263291536626014, - "learning_rate": 6.184164054320293e-06, - "loss": 0.6194, + "epoch": 0.3, + "grad_norm": 1.8606063001152102, + "learning_rate": 8.237595813740553e-06, + "loss": 0.5855, "step": 4193 }, { - "epoch": 0.44, - "grad_norm": 1.0093262515523898, - "learning_rate": 6.1825082322493655e-06, - "loss": 0.6021, + "epoch": 0.3, + "grad_norm": 1.6160097951867147, + "learning_rate": 8.236720006507432e-06, + "loss": 0.6048, "step": 4194 }, { - "epoch": 0.44, - "grad_norm": 6.4984369533755295, - "learning_rate": 6.18085227279805e-06, - "loss": 0.6466, + "epoch": 0.3, + "grad_norm": 1.5655950978399322, + "learning_rate": 8.235844028299793e-06, + "loss": 0.5577, "step": 4195 }, { - "epoch": 0.44, - "grad_norm": 2.873242485567296, - "learning_rate": 6.179196176158733e-06, - "loss": 0.6112, + "epoch": 0.3, + "grad_norm": 1.9865616726535127, + "learning_rate": 8.234967879163907e-06, + "loss": 0.5663, "step": 4196 }, { - "epoch": 0.44, - "grad_norm": 2.2153603654893232, - "learning_rate": 6.17753994252381e-06, - "loss": 0.677, + "epoch": 0.3, + "grad_norm": 1.5395279151810901, + "learning_rate": 8.234091559146057e-06, + "loss": 0.5886, "step": 4197 }, { - "epoch": 0.44, - "grad_norm": 4.885706072220139, - "learning_rate": 6.175883572085703e-06, - "loss": 0.6734, + "epoch": 0.3, + "grad_norm": 2.4834580394407877, + "learning_rate": 8.233215068292533e-06, + "loss": 0.5986, "step": 4198 }, { - "epoch": 0.44, - "grad_norm": 2.306773750236914, - "learning_rate": 6.1742270650368395e-06, - "loss": 0.6614, + "epoch": 0.3, + "grad_norm": 1.6650319016640125, + "learning_rate": 8.232338406649632e-06, + "loss": 0.5946, "step": 4199 }, { - "epoch": 0.44, - "grad_norm": 3.0757107827698014, - "learning_rate": 6.172570421569672e-06, - "loss": 0.6607, + "epoch": 0.3, + "grad_norm": 1.7706518959997848, + "learning_rate": 8.231461574263663e-06, + "loss": 0.5231, "step": 4200 }, { - "epoch": 0.44, - "grad_norm": 2.6497916669129955, - "learning_rate": 6.170913641876662e-06, - "loss": 0.6356, + "epoch": 0.3, + "grad_norm": 1.681933722108426, + "learning_rate": 8.230584571180947e-06, + "loss": 0.6473, "step": 4201 }, { - "epoch": 0.44, - "grad_norm": 2.4729495304399536, - "learning_rate": 6.1692567261502885e-06, - "loss": 0.5511, + "epoch": 0.3, + "grad_norm": 1.7714317340805206, + "learning_rate": 8.229707397447806e-06, + "loss": 0.5525, "step": 4202 }, { - "epoch": 0.44, - "grad_norm": 2.4085363284193635, - "learning_rate": 6.167599674583049e-06, - "loss": 0.6771, + "epoch": 0.3, + "grad_norm": 1.6491001523680766, + "learning_rate": 8.228830053110575e-06, + "loss": 0.5644, "step": 4203 }, { - "epoch": 0.44, - "grad_norm": 2.5904214468463556, - "learning_rate": 6.165942487367456e-06, - "loss": 0.6186, + "epoch": 0.3, + "grad_norm": 1.6971035318867842, + "learning_rate": 8.2279525382156e-06, + "loss": 0.6269, "step": 4204 }, { - "epoch": 0.44, - "grad_norm": 11.74561826819688, - "learning_rate": 6.164285164696034e-06, - "loss": 0.698, + "epoch": 0.3, + "grad_norm": 2.010449230676553, + "learning_rate": 8.227074852809236e-06, + "loss": 0.5874, "step": 4205 }, { - "epoch": 0.44, - "grad_norm": 2.4895776594825447, - "learning_rate": 6.162627706761326e-06, - "loss": 0.6161, + "epoch": 0.3, + "grad_norm": 1.4831482196527084, + "learning_rate": 8.226196996937843e-06, + "loss": 0.5829, "step": 4206 }, { - "epoch": 0.44, - "grad_norm": 3.9883010766926974, - "learning_rate": 6.160970113755894e-06, - "loss": 0.6723, + "epoch": 0.3, + "grad_norm": 1.4061421100911646, + "learning_rate": 8.22531897064779e-06, + "loss": 0.5415, "step": 4207 }, { - "epoch": 0.44, - "grad_norm": 3.412743390251808, - "learning_rate": 6.159312385872309e-06, - "loss": 0.6407, + "epoch": 0.3, + "grad_norm": 1.7465015461277564, + "learning_rate": 8.224440773985464e-06, + "loss": 0.5093, "step": 4208 }, { - "epoch": 0.44, - "grad_norm": 1.988545368414137, - "learning_rate": 6.157654523303164e-06, - "loss": 0.6851, + "epoch": 0.3, + "grad_norm": 1.4933868008570563, + "learning_rate": 8.223562406997247e-06, + "loss": 0.5438, "step": 4209 }, { - "epoch": 0.44, - "grad_norm": 2.4530378165050104, - "learning_rate": 6.15599652624106e-06, - "loss": 0.6844, + "epoch": 0.3, + "grad_norm": 1.5726636849504292, + "learning_rate": 8.222683869729544e-06, + "loss": 0.5664, "step": 4210 }, { - "epoch": 0.44, - "grad_norm": 4.747802711725356, - "learning_rate": 6.154338394878624e-06, - "loss": 0.6251, + "epoch": 0.3, + "grad_norm": 1.561597600549059, + "learning_rate": 8.221805162228758e-06, + "loss": 0.585, "step": 4211 }, { - "epoch": 0.44, - "grad_norm": 3.6279563635896923, - "learning_rate": 6.152680129408488e-06, - "loss": 0.6315, + "epoch": 0.3, + "grad_norm": 7.0476427591235185, + "learning_rate": 8.220926284541305e-06, + "loss": 0.5294, "step": 4212 }, { - "epoch": 0.44, - "grad_norm": 2.6101031201569262, - "learning_rate": 6.151021730023308e-06, - "loss": 0.6899, + "epoch": 0.3, + "grad_norm": 0.9760640543387811, + "learning_rate": 8.220047236713612e-06, + "loss": 0.4836, "step": 4213 }, { - "epoch": 0.44, - "grad_norm": 2.8097748740269637, - "learning_rate": 6.14936319691575e-06, - "loss": 0.6828, + "epoch": 0.3, + "grad_norm": 1.7365932216629094, + "learning_rate": 8.219168018792114e-06, + "loss": 0.6634, "step": 4214 }, { - "epoch": 0.44, - "grad_norm": 2.5163859758968785, - "learning_rate": 6.147704530278497e-06, - "loss": 0.7089, + "epoch": 0.3, + "grad_norm": 1.9581582190775046, + "learning_rate": 8.218288630823252e-06, + "loss": 0.5692, "step": 4215 }, { - "epoch": 0.44, - "grad_norm": 2.8122326932337884, - "learning_rate": 6.146045730304252e-06, - "loss": 0.7728, + "epoch": 0.3, + "grad_norm": 1.789445125597473, + "learning_rate": 8.21740907285348e-06, + "loss": 0.5079, "step": 4216 }, { - "epoch": 0.44, - "grad_norm": 2.2991108660602873, - "learning_rate": 6.144386797185724e-06, - "loss": 0.6771, + "epoch": 0.3, + "grad_norm": 19.96682884616191, + "learning_rate": 8.216529344929261e-06, + "loss": 0.5791, "step": 4217 }, { - "epoch": 0.44, - "grad_norm": 2.82067248565608, - "learning_rate": 6.14272773111565e-06, - "loss": 0.6825, + "epoch": 0.3, + "grad_norm": 0.8165971441561088, + "learning_rate": 8.21564944709706e-06, + "loss": 0.4451, "step": 4218 }, { - "epoch": 0.44, - "grad_norm": 4.292155901876321, - "learning_rate": 6.141068532286768e-06, - "loss": 0.7232, + "epoch": 0.3, + "grad_norm": 1.6431279924625684, + "learning_rate": 8.214769379403358e-06, + "loss": 0.6149, "step": 4219 }, { - "epoch": 0.44, - "grad_norm": 3.2138852203020436, - "learning_rate": 6.139409200891845e-06, - "loss": 0.6331, + "epoch": 0.3, + "grad_norm": 1.6587493891304463, + "learning_rate": 8.213889141894647e-06, + "loss": 0.4625, "step": 4220 }, { - "epoch": 0.44, - "grad_norm": 2.733361759557919, - "learning_rate": 6.137749737123652e-06, - "loss": 0.5726, + "epoch": 0.3, + "grad_norm": 1.7246095943061173, + "learning_rate": 8.21300873461742e-06, + "loss": 0.5338, "step": 4221 }, { - "epoch": 0.44, - "grad_norm": 2.045897121812701, - "learning_rate": 6.136090141174986e-06, - "loss": 0.6303, + "epoch": 0.3, + "grad_norm": 1.5979234154850817, + "learning_rate": 8.212128157618185e-06, + "loss": 0.5434, "step": 4222 }, { - "epoch": 0.44, - "grad_norm": 6.215595577195097, - "learning_rate": 6.134430413238649e-06, - "loss": 0.7165, + "epoch": 0.3, + "grad_norm": 1.5481385319371779, + "learning_rate": 8.211247410943458e-06, + "loss": 0.6017, "step": 4223 }, { - "epoch": 0.44, - "grad_norm": 2.4720795018859287, - "learning_rate": 6.132770553507468e-06, - "loss": 0.5848, + "epoch": 0.3, + "grad_norm": 1.6912640041680673, + "learning_rate": 8.210366494639759e-06, + "loss": 0.6101, "step": 4224 }, { - "epoch": 0.44, - "grad_norm": 2.469345000674683, - "learning_rate": 6.1311105621742775e-06, - "loss": 0.6669, + "epoch": 0.3, + "grad_norm": 1.5993382510421668, + "learning_rate": 8.209485408753625e-06, + "loss": 0.5677, "step": 4225 }, { - "epoch": 0.44, - "grad_norm": 2.625205929898641, - "learning_rate": 6.129450439431932e-06, - "loss": 0.6472, + "epoch": 0.3, + "grad_norm": 0.8327717151420451, + "learning_rate": 8.208604153331597e-06, + "loss": 0.46, "step": 4226 }, { - "epoch": 0.44, - "grad_norm": 3.5548855414175695, - "learning_rate": 6.1277901854732994e-06, - "loss": 0.6817, + "epoch": 0.3, + "grad_norm": 1.7811241456449773, + "learning_rate": 8.207722728420225e-06, + "loss": 0.5282, "step": 4227 }, { - "epoch": 0.44, - "grad_norm": 2.1027568650104005, - "learning_rate": 6.126129800491263e-06, - "loss": 0.6514, + "epoch": 0.3, + "grad_norm": 1.6646032140518985, + "learning_rate": 8.206841134066067e-06, + "loss": 0.5784, "step": 4228 }, { - "epoch": 0.45, - "grad_norm": 2.045046875891458, - "learning_rate": 6.124469284678721e-06, - "loss": 0.5825, + "epoch": 0.3, + "grad_norm": 0.7400769419798904, + "learning_rate": 8.205959370315696e-06, + "loss": 0.4662, "step": 4229 }, { - "epoch": 0.45, - "grad_norm": 1.1138142631995698, - "learning_rate": 6.122808638228588e-06, - "loss": 0.6002, + "epoch": 0.3, + "grad_norm": 2.3498507275451423, + "learning_rate": 8.205077437215687e-06, + "loss": 0.4991, "step": 4230 }, { - "epoch": 0.45, - "grad_norm": 3.3093105459556447, - "learning_rate": 6.121147861333795e-06, - "loss": 0.7347, + "epoch": 0.3, + "grad_norm": 1.545520681577717, + "learning_rate": 8.204195334812629e-06, + "loss": 0.5724, "step": 4231 }, { - "epoch": 0.45, - "grad_norm": 2.637992867101944, - "learning_rate": 6.119486954187283e-06, - "loss": 0.6293, + "epoch": 0.3, + "grad_norm": 1.740156727485627, + "learning_rate": 8.203313063153115e-06, + "loss": 0.5946, "step": 4232 }, { - "epoch": 0.45, - "grad_norm": 4.2443163383582645, - "learning_rate": 6.117825916982013e-06, - "loss": 0.6475, + "epoch": 0.3, + "grad_norm": 2.033725873031003, + "learning_rate": 8.202430622283751e-06, + "loss": 0.5333, "step": 4233 }, { - "epoch": 0.45, - "grad_norm": 2.3645005998517554, - "learning_rate": 6.116164749910959e-06, - "loss": 0.7004, + "epoch": 0.3, + "grad_norm": 1.4254601630298347, + "learning_rate": 8.20154801225115e-06, + "loss": 0.5258, "step": 4234 }, { - "epoch": 0.45, - "grad_norm": 12.488986762043982, - "learning_rate": 6.114503453167112e-06, - "loss": 0.6375, + "epoch": 0.3, + "grad_norm": 1.8425489787451375, + "learning_rate": 8.200665233101934e-06, + "loss": 0.5721, "step": 4235 }, { - "epoch": 0.45, - "grad_norm": 2.456714677650742, - "learning_rate": 6.112842026943473e-06, - "loss": 0.7201, + "epoch": 0.3, + "grad_norm": 1.6052046750993954, + "learning_rate": 8.199782284882737e-06, + "loss": 0.5553, "step": 4236 }, { - "epoch": 0.45, - "grad_norm": 2.792896979064036, - "learning_rate": 6.111180471433067e-06, - "loss": 0.7046, + "epoch": 0.3, + "grad_norm": 0.7653488722005614, + "learning_rate": 8.198899167640195e-06, + "loss": 0.4499, "step": 4237 }, { - "epoch": 0.45, - "grad_norm": 2.311085039466233, - "learning_rate": 6.109518786828924e-06, - "loss": 0.6333, + "epoch": 0.3, + "grad_norm": 1.4551314111530251, + "learning_rate": 8.198015881420961e-06, + "loss": 0.5398, "step": 4238 }, { - "epoch": 0.45, - "grad_norm": 4.082195260565265, - "learning_rate": 6.107856973324097e-06, - "loss": 0.7398, + "epoch": 0.3, + "grad_norm": 1.8582049839617305, + "learning_rate": 8.197132426271691e-06, + "loss": 0.6317, "step": 4239 }, { - "epoch": 0.45, - "grad_norm": 3.422658221464742, - "learning_rate": 6.106195031111648e-06, - "loss": 0.626, + "epoch": 0.3, + "grad_norm": 0.8252698423707818, + "learning_rate": 8.196248802239056e-06, + "loss": 0.4362, "step": 4240 }, { - "epoch": 0.45, - "grad_norm": 4.357834278269825, - "learning_rate": 6.104532960384658e-06, - "loss": 0.6259, + "epoch": 0.3, + "grad_norm": 1.5950030675106568, + "learning_rate": 8.195365009369725e-06, + "loss": 0.5302, "step": 4241 }, { - "epoch": 0.45, - "grad_norm": 2.0886646213733924, - "learning_rate": 6.1028707613362236e-06, - "loss": 0.7135, + "epoch": 0.3, + "grad_norm": 2.3532132951183833, + "learning_rate": 8.194481047710388e-06, + "loss": 0.5851, "step": 4242 }, { - "epoch": 0.45, - "grad_norm": 2.8932851903398698, - "learning_rate": 6.101208434159451e-06, - "loss": 0.6996, + "epoch": 0.3, + "grad_norm": 1.5756281479449123, + "learning_rate": 8.193596917307738e-06, + "loss": 0.6134, "step": 4243 }, { - "epoch": 0.45, - "grad_norm": 2.3623621800314547, - "learning_rate": 6.099545979047465e-06, - "loss": 0.6463, + "epoch": 0.3, + "grad_norm": 1.7171837602839939, + "learning_rate": 8.192712618208475e-06, + "loss": 0.552, "step": 4244 }, { - "epoch": 0.45, - "grad_norm": 2.3794475175322924, - "learning_rate": 6.097883396193406e-06, - "loss": 0.6134, + "epoch": 0.3, + "grad_norm": 2.7593316860099097, + "learning_rate": 8.191828150459316e-06, + "loss": 0.6062, "step": 4245 }, { - "epoch": 0.45, - "grad_norm": 2.2159715788742234, - "learning_rate": 6.09622068579043e-06, - "loss": 0.6379, + "epoch": 0.3, + "grad_norm": 0.7696611211097445, + "learning_rate": 8.19094351410698e-06, + "loss": 0.4915, "step": 4246 }, { - "epoch": 0.45, - "grad_norm": 3.65089220209315, - "learning_rate": 6.094557848031699e-06, - "loss": 0.6913, + "epoch": 0.3, + "grad_norm": 1.8138343372343733, + "learning_rate": 8.190058709198193e-06, + "loss": 0.5292, "step": 4247 }, { - "epoch": 0.45, - "grad_norm": 2.238068508379566, - "learning_rate": 6.092894883110405e-06, - "loss": 0.6267, + "epoch": 0.3, + "grad_norm": 1.607835255615697, + "learning_rate": 8.189173735779695e-06, + "loss": 0.5725, "step": 4248 }, { - "epoch": 0.45, - "grad_norm": 2.62840874216548, - "learning_rate": 6.0912317912197416e-06, - "loss": 0.6864, + "epoch": 0.3, + "grad_norm": 1.6014220462689603, + "learning_rate": 8.188288593898235e-06, + "loss": 0.4691, "step": 4249 }, { - "epoch": 0.45, - "grad_norm": 2.7152502107110603, - "learning_rate": 6.089568572552923e-06, - "loss": 0.6414, + "epoch": 0.3, + "grad_norm": 1.9292006668606876, + "learning_rate": 8.187403283600568e-06, + "loss": 0.5571, "step": 4250 }, { - "epoch": 0.45, - "grad_norm": 1.0634647134438302, - "learning_rate": 6.087905227303177e-06, - "loss": 0.5915, + "epoch": 0.3, + "grad_norm": 1.839405984841153, + "learning_rate": 8.18651780493346e-06, + "loss": 0.5172, "step": 4251 }, { - "epoch": 0.45, - "grad_norm": 2.6867407708114026, - "learning_rate": 6.086241755663746e-06, - "loss": 0.668, + "epoch": 0.3, + "grad_norm": 1.8676733100138836, + "learning_rate": 8.185632157943682e-06, + "loss": 0.5508, "step": 4252 }, { - "epoch": 0.45, - "grad_norm": 1.0245261285927338, - "learning_rate": 6.08457815782789e-06, - "loss": 0.5945, + "epoch": 0.3, + "grad_norm": 1.6414132682658147, + "learning_rate": 8.18474634267802e-06, + "loss": 0.5701, "step": 4253 }, { - "epoch": 0.45, - "grad_norm": 2.184617873138533, - "learning_rate": 6.082914433988875e-06, - "loss": 0.566, + "epoch": 0.3, + "grad_norm": 1.6107034650566234, + "learning_rate": 8.183860359183264e-06, + "loss": 0.5234, "step": 4254 }, { - "epoch": 0.45, - "grad_norm": 2.2773216580867945, - "learning_rate": 6.081250584339996e-06, - "loss": 0.5985, + "epoch": 0.3, + "grad_norm": 1.5519368281673547, + "learning_rate": 8.182974207506218e-06, + "loss": 0.5079, "step": 4255 }, { - "epoch": 0.45, - "grad_norm": 2.2623346319662763, - "learning_rate": 6.079586609074547e-06, - "loss": 0.6648, + "epoch": 0.3, + "grad_norm": 1.450720658454228, + "learning_rate": 8.182087887693687e-06, + "loss": 0.561, "step": 4256 }, { - "epoch": 0.45, - "grad_norm": 3.594546200291953, - "learning_rate": 6.077922508385849e-06, - "loss": 0.6895, + "epoch": 0.3, + "grad_norm": 1.5393574885034085, + "learning_rate": 8.181201399792492e-06, + "loss": 0.5222, "step": 4257 }, { - "epoch": 0.45, - "grad_norm": 2.2014493515781806, - "learning_rate": 6.076258282467227e-06, - "loss": 0.6097, + "epoch": 0.3, + "grad_norm": 1.5434876223080358, + "learning_rate": 8.180314743849459e-06, + "loss": 0.5537, "step": 4258 }, { - "epoch": 0.45, - "grad_norm": 2.6694207927575233, - "learning_rate": 6.074593931512031e-06, - "loss": 0.6801, + "epoch": 0.3, + "grad_norm": 1.6256919191143686, + "learning_rate": 8.179427919911425e-06, + "loss": 0.5386, "step": 4259 }, { - "epoch": 0.45, - "grad_norm": 3.451259021434374, - "learning_rate": 6.072929455713616e-06, - "loss": 0.6692, + "epoch": 0.3, + "grad_norm": 2.0980988172323096, + "learning_rate": 8.178540928025236e-06, + "loss": 0.5033, "step": 4260 }, { - "epoch": 0.45, - "grad_norm": 1.9337354819470554, - "learning_rate": 6.07126485526536e-06, - "loss": 0.5854, + "epoch": 0.3, + "grad_norm": 2.0611411734553857, + "learning_rate": 8.177653768237744e-06, + "loss": 0.5033, "step": 4261 }, { - "epoch": 0.45, - "grad_norm": 9.767008015068956, - "learning_rate": 6.0696001303606486e-06, - "loss": 0.6037, + "epoch": 0.3, + "grad_norm": 1.4218449186715503, + "learning_rate": 8.176766440595812e-06, + "loss": 0.4939, "step": 4262 }, { - "epoch": 0.45, - "grad_norm": 3.0086485367597264, - "learning_rate": 6.067935281192887e-06, - "loss": 0.6432, + "epoch": 0.3, + "grad_norm": 1.9467803163175568, + "learning_rate": 8.175878945146312e-06, + "loss": 0.5442, "step": 4263 }, { - "epoch": 0.45, - "grad_norm": 2.153528393185996, - "learning_rate": 6.066270307955492e-06, - "loss": 0.643, + "epoch": 0.3, + "grad_norm": 0.8404212571999479, + "learning_rate": 8.174991281936127e-06, + "loss": 0.4567, "step": 4264 }, { - "epoch": 0.45, - "grad_norm": 3.1645611909710882, - "learning_rate": 6.064605210841893e-06, - "loss": 0.6053, + "epoch": 0.3, + "grad_norm": 1.4710047748911266, + "learning_rate": 8.174103451012142e-06, + "loss": 0.5364, "step": 4265 }, { - "epoch": 0.45, - "grad_norm": 2.309866791740079, - "learning_rate": 6.062939990045541e-06, - "loss": 0.6321, + "epoch": 0.3, + "grad_norm": 2.519682572781901, + "learning_rate": 8.173215452421259e-06, + "loss": 0.6639, "step": 4266 }, { - "epoch": 0.45, - "grad_norm": 2.156210421450635, - "learning_rate": 6.06127464575989e-06, - "loss": 0.5757, + "epoch": 0.3, + "grad_norm": 0.8517253187394633, + "learning_rate": 8.172327286210382e-06, + "loss": 0.4938, "step": 4267 }, { - "epoch": 0.45, - "grad_norm": 3.244885997343262, - "learning_rate": 6.059609178178423e-06, - "loss": 0.6942, + "epoch": 0.3, + "grad_norm": 1.637167819783796, + "learning_rate": 8.17143895242643e-06, + "loss": 0.575, "step": 4268 }, { - "epoch": 0.45, - "grad_norm": 3.152053051101937, - "learning_rate": 6.0579435874946205e-06, - "loss": 0.6633, + "epoch": 0.3, + "grad_norm": 1.6311873938870587, + "learning_rate": 8.170550451116325e-06, + "loss": 0.5171, "step": 4269 }, { - "epoch": 0.45, - "grad_norm": 4.478964275396135, - "learning_rate": 6.056277873901993e-06, - "loss": 0.6697, + "epoch": 0.3, + "grad_norm": 5.593648480827877, + "learning_rate": 8.169661782327002e-06, + "loss": 0.5767, "step": 4270 }, { - "epoch": 0.45, - "grad_norm": 2.9729373074740866, - "learning_rate": 6.054612037594053e-06, - "loss": 0.7223, + "epoch": 0.3, + "grad_norm": 1.7775731214289612, + "learning_rate": 8.168772946105403e-06, + "loss": 0.5248, "step": 4271 }, { - "epoch": 0.45, - "grad_norm": 2.0783619133176625, - "learning_rate": 6.052946078764337e-06, - "loss": 0.6165, + "epoch": 0.3, + "grad_norm": 1.886576981047356, + "learning_rate": 8.167883942498482e-06, + "loss": 0.5555, "step": 4272 }, { - "epoch": 0.45, - "grad_norm": 1.2263809793723803, - "learning_rate": 6.0512799976063885e-06, - "loss": 0.586, + "epoch": 0.3, + "grad_norm": 2.008619813638456, + "learning_rate": 8.166994771553194e-06, + "loss": 0.6, "step": 4273 }, { - "epoch": 0.45, - "grad_norm": 2.84734542991169, - "learning_rate": 6.049613794313769e-06, - "loss": 0.651, + "epoch": 0.3, + "grad_norm": 1.618627618842228, + "learning_rate": 8.166105433316513e-06, + "loss": 0.5722, "step": 4274 }, { - "epoch": 0.45, - "grad_norm": 2.3793929939042915, - "learning_rate": 6.047947469080053e-06, - "loss": 0.7264, + "epoch": 0.3, + "grad_norm": 1.867981150947837, + "learning_rate": 8.165215927835413e-06, + "loss": 0.6059, "step": 4275 }, { - "epoch": 0.45, - "grad_norm": 3.7894502963984307, - "learning_rate": 6.0462810220988284e-06, - "loss": 0.5848, + "epoch": 0.3, + "grad_norm": 1.5202703025353805, + "learning_rate": 8.164326255156883e-06, + "loss": 0.5189, "step": 4276 }, { - "epoch": 0.45, - "grad_norm": 2.385912284914175, - "learning_rate": 6.044614453563702e-06, - "loss": 0.6738, + "epoch": 0.3, + "grad_norm": 1.8681792848090986, + "learning_rate": 8.163436415327919e-06, + "loss": 0.549, "step": 4277 }, { - "epoch": 0.45, - "grad_norm": 3.0450272266233553, - "learning_rate": 6.042947763668285e-06, - "loss": 0.746, + "epoch": 0.3, + "grad_norm": 1.996835077982992, + "learning_rate": 8.162546408395524e-06, + "loss": 0.5401, "step": 4278 }, { - "epoch": 0.45, - "grad_norm": 2.0469407502606782, - "learning_rate": 6.041280952606214e-06, - "loss": 0.5847, + "epoch": 0.3, + "grad_norm": 2.2644138093798114, + "learning_rate": 8.16165623440671e-06, + "loss": 0.5474, "step": 4279 }, { - "epoch": 0.45, - "grad_norm": 2.531872581619871, - "learning_rate": 6.03961402057113e-06, - "loss": 0.5961, + "epoch": 0.3, + "grad_norm": 1.8336811889449225, + "learning_rate": 8.160765893408502e-06, + "loss": 0.5762, "step": 4280 }, { - "epoch": 0.45, - "grad_norm": 0.9848807465163134, - "learning_rate": 6.037946967756696e-06, - "loss": 0.6215, + "epoch": 0.3, + "grad_norm": 1.5400338940958636, + "learning_rate": 8.15987538544793e-06, + "loss": 0.545, "step": 4281 }, { - "epoch": 0.45, - "grad_norm": 2.090185818907428, - "learning_rate": 6.036279794356582e-06, - "loss": 0.6028, + "epoch": 0.3, + "grad_norm": 3.1566621188646162, + "learning_rate": 8.158984710572032e-06, + "loss": 0.5775, "step": 4282 }, { - "epoch": 0.45, - "grad_norm": 1.995346619697124, - "learning_rate": 6.034612500564479e-06, - "loss": 0.5208, + "epoch": 0.3, + "grad_norm": 2.2225292244686936, + "learning_rate": 8.158093868827857e-06, + "loss": 0.5192, "step": 4283 }, { - "epoch": 0.45, - "grad_norm": 2.7529881198497277, - "learning_rate": 6.032945086574085e-06, - "loss": 0.6228, + "epoch": 0.3, + "grad_norm": 1.3858511557648974, + "learning_rate": 8.157202860262463e-06, + "loss": 0.5349, "step": 4284 }, { - "epoch": 0.45, - "grad_norm": 3.3379083789144857, - "learning_rate": 6.0312775525791165e-06, - "loss": 0.6813, + "epoch": 0.3, + "grad_norm": 1.5757117334566655, + "learning_rate": 8.156311684922917e-06, + "loss": 0.542, "step": 4285 }, { - "epoch": 0.45, - "grad_norm": 2.119809893657813, - "learning_rate": 6.029609898773305e-06, - "loss": 0.6076, + "epoch": 0.3, + "grad_norm": 1.5637551674146335, + "learning_rate": 8.155420342856292e-06, + "loss": 0.5485, "step": 4286 }, { - "epoch": 0.45, - "grad_norm": 2.5052511733476788, - "learning_rate": 6.027942125350389e-06, - "loss": 0.6299, + "epoch": 0.3, + "grad_norm": 1.8961452206644958, + "learning_rate": 8.154528834109674e-06, + "loss": 0.507, "step": 4287 }, { - "epoch": 0.45, - "grad_norm": 2.655210831114237, - "learning_rate": 6.02627423250413e-06, - "loss": 0.6856, + "epoch": 0.3, + "grad_norm": 1.652404428830408, + "learning_rate": 8.153637158730151e-06, + "loss": 0.5513, "step": 4288 }, { - "epoch": 0.45, - "grad_norm": 2.625661240338276, - "learning_rate": 6.024606220428297e-06, - "loss": 0.647, + "epoch": 0.3, + "grad_norm": 1.6868275649135815, + "learning_rate": 8.152745316764829e-06, + "loss": 0.5195, "step": 4289 }, { - "epoch": 0.45, - "grad_norm": 3.2243027452718698, - "learning_rate": 6.022938089316677e-06, - "loss": 0.7011, + "epoch": 0.3, + "grad_norm": 1.5732812232491098, + "learning_rate": 8.151853308260816e-06, + "loss": 0.5821, "step": 4290 }, { - "epoch": 0.45, - "grad_norm": 2.3340700591649557, - "learning_rate": 6.021269839363063e-06, - "loss": 0.6817, + "epoch": 0.3, + "grad_norm": 1.533875686881365, + "learning_rate": 8.150961133265232e-06, + "loss": 0.5366, "step": 4291 }, { - "epoch": 0.45, - "grad_norm": 3.0637410458294068, - "learning_rate": 6.019601470761275e-06, - "loss": 0.668, + "epoch": 0.3, + "grad_norm": 1.4920170387571963, + "learning_rate": 8.150068791825202e-06, + "loss": 0.542, "step": 4292 }, { - "epoch": 0.45, - "grad_norm": 2.865208891942265, - "learning_rate": 6.017932983705132e-06, - "loss": 0.6142, + "epoch": 0.3, + "grad_norm": 1.769599622702397, + "learning_rate": 8.149176283987868e-06, + "loss": 0.6213, "step": 4293 }, { - "epoch": 0.45, - "grad_norm": 2.9027349130767828, - "learning_rate": 6.016264378388481e-06, - "loss": 0.6485, + "epoch": 0.3, + "grad_norm": 1.652577711270736, + "learning_rate": 8.148283609800369e-06, + "loss": 0.6398, "step": 4294 }, { - "epoch": 0.45, - "grad_norm": 2.9168453504696266, - "learning_rate": 6.0145956550051694e-06, - "loss": 0.6187, + "epoch": 0.3, + "grad_norm": 1.6387522681487596, + "learning_rate": 8.147390769309863e-06, + "loss": 0.4773, "step": 4295 }, { - "epoch": 0.45, - "grad_norm": 2.7032692104534073, - "learning_rate": 6.01292681374907e-06, - "loss": 0.6664, + "epoch": 0.3, + "grad_norm": 1.5373937683951593, + "learning_rate": 8.146497762563512e-06, + "loss": 0.5968, "step": 4296 }, { - "epoch": 0.45, - "grad_norm": 2.223566307096264, - "learning_rate": 6.01125785481406e-06, - "loss": 0.5642, + "epoch": 0.3, + "grad_norm": 1.5967383973748752, + "learning_rate": 8.145604589608487e-06, + "loss": 0.5781, "step": 4297 }, { - "epoch": 0.45, - "grad_norm": 3.504081111094006, - "learning_rate": 6.009588778394035e-06, - "loss": 0.6222, + "epoch": 0.3, + "grad_norm": 1.510701780990691, + "learning_rate": 8.14471125049197e-06, + "loss": 0.5184, "step": 4298 }, { - "epoch": 0.45, - "grad_norm": 1.8721849635784116, - "learning_rate": 6.0079195846829055e-06, - "loss": 0.629, + "epoch": 0.31, + "grad_norm": 1.5535092599365121, + "learning_rate": 8.143817745261147e-06, + "loss": 0.5519, "step": 4299 }, { - "epoch": 0.45, - "grad_norm": 1.9713414450562043, - "learning_rate": 6.006250273874591e-06, - "loss": 0.6071, + "epoch": 0.31, + "grad_norm": 0.7549774053823383, + "learning_rate": 8.142924073963219e-06, + "loss": 0.4817, "step": 4300 }, { - "epoch": 0.45, - "grad_norm": 2.0636910457431736, - "learning_rate": 6.0045808461630295e-06, - "loss": 0.606, + "epoch": 0.31, + "grad_norm": 1.8057355273154736, + "learning_rate": 8.142030236645391e-06, + "loss": 0.525, "step": 4301 }, { - "epoch": 0.45, - "grad_norm": 2.4935316363007756, - "learning_rate": 6.002911301742168e-06, - "loss": 0.6487, + "epoch": 0.31, + "grad_norm": 1.6917808065446482, + "learning_rate": 8.141136233354881e-06, + "loss": 0.6773, "step": 4302 }, { - "epoch": 0.45, - "grad_norm": 2.0612655080904037, - "learning_rate": 6.001241640805973e-06, - "loss": 0.6267, + "epoch": 0.31, + "grad_norm": 1.4648906775816386, + "learning_rate": 8.140242064138907e-06, + "loss": 0.5298, "step": 4303 }, { - "epoch": 0.45, - "grad_norm": 5.5302669242733815, - "learning_rate": 5.999571863548416e-06, - "loss": 0.6001, + "epoch": 0.31, + "grad_norm": 1.6324164427227914, + "learning_rate": 8.13934772904471e-06, + "loss": 0.5722, "step": 4304 }, { - "epoch": 0.45, - "grad_norm": 2.4210531577615226, - "learning_rate": 5.997901970163491e-06, - "loss": 0.6732, + "epoch": 0.31, + "grad_norm": 1.645317849089794, + "learning_rate": 8.13845322811953e-06, + "loss": 0.5837, "step": 4305 }, { - "epoch": 0.45, - "grad_norm": 2.4958888183227415, - "learning_rate": 5.996231960845198e-06, - "loss": 0.6682, + "epoch": 0.31, + "grad_norm": 1.5363764928897885, + "learning_rate": 8.137558561410615e-06, + "loss": 0.5659, "step": 4306 }, { - "epoch": 0.45, - "grad_norm": 2.5503369133292892, - "learning_rate": 5.994561835787558e-06, - "loss": 0.5959, + "epoch": 0.31, + "grad_norm": 1.883059435769436, + "learning_rate": 8.136663728965225e-06, + "loss": 0.5584, "step": 4307 }, { - "epoch": 0.45, - "grad_norm": 2.176063899739418, - "learning_rate": 5.992891595184596e-06, - "loss": 0.6718, + "epoch": 0.31, + "grad_norm": 1.4677561176895273, + "learning_rate": 8.135768730830627e-06, + "loss": 0.5098, "step": 4308 }, { - "epoch": 0.45, - "grad_norm": 2.4856814525301485, - "learning_rate": 5.991221239230362e-06, - "loss": 0.6848, + "epoch": 0.31, + "grad_norm": 1.7454917171214832, + "learning_rate": 8.1348735670541e-06, + "loss": 0.6005, "step": 4309 }, { - "epoch": 0.45, - "grad_norm": 2.966837942768863, - "learning_rate": 5.989550768118908e-06, - "loss": 0.5977, + "epoch": 0.31, + "grad_norm": 1.5148017788920904, + "learning_rate": 8.133978237682929e-06, + "loss": 0.5031, "step": 4310 }, { - "epoch": 0.45, - "grad_norm": 2.4789769096013554, - "learning_rate": 5.987880182044304e-06, - "loss": 0.5662, + "epoch": 0.31, + "grad_norm": 1.4807678415021261, + "learning_rate": 8.133082742764407e-06, + "loss": 0.5702, "step": 4311 }, { - "epoch": 0.45, - "grad_norm": 2.0922816492045966, - "learning_rate": 5.98620948120064e-06, - "loss": 0.7099, + "epoch": 0.31, + "grad_norm": 1.9473736359406637, + "learning_rate": 8.132187082345839e-06, + "loss": 0.537, "step": 4312 }, { - "epoch": 0.45, - "grad_norm": 2.144043338021814, - "learning_rate": 5.984538665782007e-06, - "loss": 0.6219, + "epoch": 0.31, + "grad_norm": 1.482190164618764, + "learning_rate": 8.131291256474536e-06, + "loss": 0.4469, "step": 4313 }, { - "epoch": 0.45, - "grad_norm": 1.9684756423653305, - "learning_rate": 5.9828677359825196e-06, - "loss": 0.6493, + "epoch": 0.31, + "grad_norm": 1.4793252157669619, + "learning_rate": 8.130395265197819e-06, + "loss": 0.5628, "step": 4314 }, { - "epoch": 0.45, - "grad_norm": 2.3925762779344515, - "learning_rate": 5.981196691996298e-06, - "loss": 0.5853, + "epoch": 0.31, + "grad_norm": 1.830228702671688, + "learning_rate": 8.129499108563016e-06, + "loss": 0.6829, "step": 4315 }, { - "epoch": 0.45, - "grad_norm": 2.9740384297980214, - "learning_rate": 5.9795255340174825e-06, - "loss": 0.656, + "epoch": 0.31, + "grad_norm": 1.8170147770072045, + "learning_rate": 8.128602786617468e-06, + "loss": 0.536, "step": 4316 }, { - "epoch": 0.45, - "grad_norm": 2.1922968760567727, - "learning_rate": 5.9778542622402205e-06, - "loss": 0.5823, + "epoch": 0.31, + "grad_norm": 1.6314135647465575, + "learning_rate": 8.127706299408518e-06, + "loss": 0.6166, "step": 4317 }, { - "epoch": 0.45, - "grad_norm": 2.08742766373127, - "learning_rate": 5.976182876858679e-06, - "loss": 0.6744, + "epoch": 0.31, + "grad_norm": 1.6036639706001015, + "learning_rate": 8.126809646983522e-06, + "loss": 0.5969, "step": 4318 }, { - "epoch": 0.45, - "grad_norm": 2.4973148138070638, - "learning_rate": 5.9745113780670305e-06, - "loss": 0.6447, + "epoch": 0.31, + "grad_norm": 1.5116670755484711, + "learning_rate": 8.125912829389848e-06, + "loss": 0.5258, "step": 4319 }, { - "epoch": 0.45, - "grad_norm": 2.6221490595037755, - "learning_rate": 5.972839766059469e-06, - "loss": 0.6632, + "epoch": 0.31, + "grad_norm": 1.593244433711856, + "learning_rate": 8.125015846674864e-06, + "loss": 0.5569, "step": 4320 }, { - "epoch": 0.45, - "grad_norm": 2.182690583898645, - "learning_rate": 5.971168041030194e-06, - "loss": 0.7078, + "epoch": 0.31, + "grad_norm": 1.795551469012507, + "learning_rate": 8.124118698885955e-06, + "loss": 0.5996, "step": 4321 }, { - "epoch": 0.45, - "grad_norm": 1.984007580074134, - "learning_rate": 5.969496203173424e-06, - "loss": 0.5901, + "epoch": 0.31, + "grad_norm": 1.8392142941702296, + "learning_rate": 8.12322138607051e-06, + "loss": 0.5044, "step": 4322 }, { - "epoch": 0.45, - "grad_norm": 3.2119980194558324, - "learning_rate": 5.967824252683389e-06, - "loss": 0.6961, + "epoch": 0.31, + "grad_norm": 1.4514057962638305, + "learning_rate": 8.12232390827593e-06, + "loss": 0.5987, "step": 4323 }, { - "epoch": 0.46, - "grad_norm": 1.9424736990101, - "learning_rate": 5.9661521897543276e-06, - "loss": 0.5905, + "epoch": 0.31, + "grad_norm": 1.7045681567609239, + "learning_rate": 8.121426265549619e-06, + "loss": 0.575, "step": 4324 }, { - "epoch": 0.46, - "grad_norm": 2.3950357257174666, - "learning_rate": 5.9644800145805e-06, - "loss": 0.6498, + "epoch": 0.31, + "grad_norm": 2.200218573753405, + "learning_rate": 8.120528457938998e-06, + "loss": 0.4767, "step": 4325 }, { - "epoch": 0.46, - "grad_norm": 2.9016254383942606, - "learning_rate": 5.962807727356169e-06, - "loss": 0.7386, + "epoch": 0.31, + "grad_norm": 1.6328214606599796, + "learning_rate": 8.119630485491487e-06, + "loss": 0.5858, "step": 4326 }, { - "epoch": 0.46, - "grad_norm": 2.187079791651539, - "learning_rate": 5.9611353282756235e-06, - "loss": 0.6256, + "epoch": 0.31, + "grad_norm": 2.278147380065056, + "learning_rate": 8.118732348254525e-06, + "loss": 0.5181, "step": 4327 }, { - "epoch": 0.46, - "grad_norm": 3.203908956516435, - "learning_rate": 5.95946281753315e-06, - "loss": 0.5533, + "epoch": 0.31, + "grad_norm": 1.4936285391255903, + "learning_rate": 8.117834046275555e-06, + "loss": 0.5234, "step": 4328 }, { - "epoch": 0.46, - "grad_norm": 2.3206237982379285, - "learning_rate": 5.957790195323064e-06, - "loss": 0.5879, + "epoch": 0.31, + "grad_norm": 1.867204636648518, + "learning_rate": 8.116935579602022e-06, + "loss": 0.5721, "step": 4329 }, { - "epoch": 0.46, - "grad_norm": 2.0758013751273796, - "learning_rate": 5.956117461839679e-06, - "loss": 0.7067, + "epoch": 0.31, + "grad_norm": 1.749089005601295, + "learning_rate": 8.116036948281392e-06, + "loss": 0.5434, "step": 4330 }, { - "epoch": 0.46, - "grad_norm": 2.9307153953571254, - "learning_rate": 5.954444617277332e-06, - "loss": 0.6043, + "epoch": 0.31, + "grad_norm": 1.6525435481599546, + "learning_rate": 8.115138152361132e-06, + "loss": 0.4935, "step": 4331 }, { - "epoch": 0.46, - "grad_norm": 3.1653578493636227, - "learning_rate": 5.952771661830368e-06, - "loss": 0.6076, + "epoch": 0.31, + "grad_norm": 1.5894650705243134, + "learning_rate": 8.11423919188872e-06, + "loss": 0.5943, "step": 4332 }, { - "epoch": 0.46, - "grad_norm": 2.835128332613066, - "learning_rate": 5.951098595693146e-06, - "loss": 0.5411, + "epoch": 0.31, + "grad_norm": 1.7466673496420493, + "learning_rate": 8.113340066911639e-06, + "loss": 0.5176, "step": 4333 }, { - "epoch": 0.46, - "grad_norm": 2.314934556444207, - "learning_rate": 5.9494254190600395e-06, - "loss": 0.7049, + "epoch": 0.31, + "grad_norm": 1.5762692869169663, + "learning_rate": 8.112440777477388e-06, + "loss": 0.5586, "step": 4334 }, { - "epoch": 0.46, - "grad_norm": 2.742652891296002, - "learning_rate": 5.947752132125432e-06, - "loss": 0.6817, + "epoch": 0.31, + "grad_norm": 1.7492268742241084, + "learning_rate": 8.111541323633467e-06, + "loss": 0.6556, "step": 4335 }, { - "epoch": 0.46, - "grad_norm": 2.3350065294888402, - "learning_rate": 5.946078735083723e-06, - "loss": 0.6664, + "epoch": 0.31, + "grad_norm": 1.7231519079123618, + "learning_rate": 8.11064170542739e-06, + "loss": 0.6105, "step": 4336 }, { - "epoch": 0.46, - "grad_norm": 2.878910527625717, - "learning_rate": 5.944405228129318e-06, - "loss": 0.6108, + "epoch": 0.31, + "grad_norm": 1.7867314867887323, + "learning_rate": 8.10974192290668e-06, + "loss": 0.5518, "step": 4337 }, { - "epoch": 0.46, - "grad_norm": 2.8697645174477455, - "learning_rate": 5.942731611456647e-06, - "loss": 0.6308, + "epoch": 0.31, + "grad_norm": 1.587267110552621, + "learning_rate": 8.108841976118864e-06, + "loss": 0.6104, "step": 4338 }, { - "epoch": 0.46, - "grad_norm": 3.8193425849051312, - "learning_rate": 5.941057885260141e-06, - "loss": 0.6536, + "epoch": 0.31, + "grad_norm": 1.7486084697077362, + "learning_rate": 8.10794186511148e-06, + "loss": 0.559, "step": 4339 }, { - "epoch": 0.46, - "grad_norm": 2.3962102257159876, - "learning_rate": 5.939384049734252e-06, - "loss": 0.6428, + "epoch": 0.31, + "grad_norm": 1.7905809751171622, + "learning_rate": 8.107041589932076e-06, + "loss": 0.5556, "step": 4340 }, { - "epoch": 0.46, - "grad_norm": 1.897199349105444, - "learning_rate": 5.937710105073436e-06, - "loss": 0.6204, + "epoch": 0.31, + "grad_norm": 1.4963486323622954, + "learning_rate": 8.106141150628207e-06, + "loss": 0.5758, "step": 4341 }, { - "epoch": 0.46, - "grad_norm": 4.592219177725684, - "learning_rate": 5.936036051472173e-06, - "loss": 0.6533, + "epoch": 0.31, + "grad_norm": 2.119541546899883, + "learning_rate": 8.105240547247438e-06, + "loss": 0.537, "step": 4342 }, { - "epoch": 0.46, - "grad_norm": 2.856840556341079, - "learning_rate": 5.934361889124946e-06, - "loss": 0.6482, + "epoch": 0.31, + "grad_norm": 1.5021880904208744, + "learning_rate": 8.104339779837339e-06, + "loss": 0.5884, "step": 4343 }, { - "epoch": 0.46, - "grad_norm": 3.8920368478735567, - "learning_rate": 5.9326876182262575e-06, - "loss": 0.6338, + "epoch": 0.31, + "grad_norm": 1.796368598152394, + "learning_rate": 8.103438848445493e-06, + "loss": 0.519, "step": 4344 }, { - "epoch": 0.46, - "grad_norm": 2.3189541995763316, - "learning_rate": 5.931013238970616e-06, - "loss": 0.6674, + "epoch": 0.31, + "grad_norm": 1.611578343880975, + "learning_rate": 8.102537753119494e-06, + "loss": 0.5802, "step": 4345 }, { - "epoch": 0.46, - "grad_norm": 3.1842357570851374, - "learning_rate": 5.929338751552549e-06, - "loss": 0.6539, + "epoch": 0.31, + "grad_norm": 1.6164351108482622, + "learning_rate": 8.101636493906938e-06, + "loss": 0.5571, "step": 4346 }, { - "epoch": 0.46, - "grad_norm": 2.412864761506081, - "learning_rate": 5.927664156166592e-06, - "loss": 0.6452, + "epoch": 0.31, + "grad_norm": 1.862393924407118, + "learning_rate": 8.100735070855432e-06, + "loss": 0.5548, "step": 4347 }, { - "epoch": 0.46, - "grad_norm": 3.450085408288252, - "learning_rate": 5.925989453007294e-06, - "loss": 0.5489, + "epoch": 0.31, + "grad_norm": 2.7323161553217568, + "learning_rate": 8.099833484012592e-06, + "loss": 0.5722, "step": 4348 }, { - "epoch": 0.46, - "grad_norm": 2.413183425668527, - "learning_rate": 5.924314642269219e-06, - "loss": 0.5971, + "epoch": 0.31, + "grad_norm": 1.636104383140665, + "learning_rate": 8.098931733426045e-06, + "loss": 0.5375, "step": 4349 }, { - "epoch": 0.46, - "grad_norm": 2.077588646518163, - "learning_rate": 5.922639724146939e-06, - "loss": 0.6276, + "epoch": 0.31, + "grad_norm": 1.4735569546738667, + "learning_rate": 8.098029819143422e-06, + "loss": 0.5773, "step": 4350 }, { - "epoch": 0.46, - "grad_norm": 2.440785017604606, - "learning_rate": 5.920964698835047e-06, - "loss": 0.6674, + "epoch": 0.31, + "grad_norm": 1.5910346449706478, + "learning_rate": 8.097127741212369e-06, + "loss": 0.5564, "step": 4351 }, { - "epoch": 0.46, - "grad_norm": 3.242550745644832, - "learning_rate": 5.919289566528135e-06, - "loss": 0.5893, + "epoch": 0.31, + "grad_norm": 0.9208147353501052, + "learning_rate": 8.096225499680532e-06, + "loss": 0.4491, "step": 4352 }, { - "epoch": 0.46, - "grad_norm": 2.571180994234153, - "learning_rate": 5.9176143274208185e-06, - "loss": 0.7091, + "epoch": 0.31, + "grad_norm": 1.5388194020900825, + "learning_rate": 8.095323094595574e-06, + "loss": 0.5474, "step": 4353 }, { - "epoch": 0.46, - "grad_norm": 2.4118573325477386, - "learning_rate": 5.915938981707724e-06, - "loss": 0.6622, + "epoch": 0.31, + "grad_norm": 1.7951108183032523, + "learning_rate": 8.09442052600516e-06, + "loss": 0.5478, "step": 4354 }, { - "epoch": 0.46, - "grad_norm": 2.5558862540573424, - "learning_rate": 5.914263529583485e-06, - "loss": 0.7812, + "epoch": 0.31, + "grad_norm": 1.818974541988249, + "learning_rate": 8.09351779395697e-06, + "loss": 0.5918, "step": 4355 }, { - "epoch": 0.46, - "grad_norm": 2.2737963351658887, - "learning_rate": 5.9125879712427525e-06, - "loss": 0.5881, + "epoch": 0.31, + "grad_norm": 0.7267951740901497, + "learning_rate": 8.092614898498685e-06, + "loss": 0.4505, "step": 4356 }, { - "epoch": 0.46, - "grad_norm": 2.4431483934766702, - "learning_rate": 5.9109123068801875e-06, - "loss": 0.7217, + "epoch": 0.31, + "grad_norm": 1.768425729472655, + "learning_rate": 8.091711839678005e-06, + "loss": 0.5641, "step": 4357 }, { - "epoch": 0.46, - "grad_norm": 2.3370688862721347, - "learning_rate": 5.909236536690464e-06, - "loss": 0.6663, + "epoch": 0.31, + "grad_norm": 1.5821051335137488, + "learning_rate": 8.090808617542629e-06, + "loss": 0.6276, "step": 4358 }, { - "epoch": 0.46, - "grad_norm": 2.3073085136490574, - "learning_rate": 5.907560660868266e-06, - "loss": 0.6542, + "epoch": 0.31, + "grad_norm": 0.8155148311949443, + "learning_rate": 8.089905232140268e-06, + "loss": 0.4658, "step": 4359 }, { - "epoch": 0.46, - "grad_norm": 3.2027134863893343, - "learning_rate": 5.905884679608297e-06, - "loss": 0.6156, + "epoch": 0.31, + "grad_norm": 1.5612878546186217, + "learning_rate": 8.089001683518642e-06, + "loss": 0.5875, "step": 4360 }, { - "epoch": 0.46, - "grad_norm": 2.5426901682583063, - "learning_rate": 5.904208593105263e-06, - "loss": 0.7358, + "epoch": 0.31, + "grad_norm": 1.8826165154795167, + "learning_rate": 8.088097971725482e-06, + "loss": 0.5548, "step": 4361 }, { - "epoch": 0.46, - "grad_norm": 3.4513894931625066, - "learning_rate": 5.902532401553888e-06, - "loss": 0.6191, + "epoch": 0.31, + "grad_norm": 1.9856631800558473, + "learning_rate": 8.087194096808522e-06, + "loss": 0.6211, "step": 4362 }, { - "epoch": 0.46, - "grad_norm": 2.4589931992350498, - "learning_rate": 5.900856105148908e-06, - "loss": 0.6734, + "epoch": 0.31, + "grad_norm": 0.8474577998917612, + "learning_rate": 8.086290058815508e-06, + "loss": 0.4643, "step": 4363 }, { - "epoch": 0.46, - "grad_norm": 2.33735601801206, - "learning_rate": 5.899179704085072e-06, - "loss": 0.6383, + "epoch": 0.31, + "grad_norm": 0.8488359917748365, + "learning_rate": 8.085385857794197e-06, + "loss": 0.4545, "step": 4364 }, { - "epoch": 0.46, - "grad_norm": 2.153407332381854, - "learning_rate": 5.897503198557134e-06, - "loss": 0.6444, + "epoch": 0.31, + "grad_norm": 1.8036750475634358, + "learning_rate": 8.08448149379235e-06, + "loss": 0.5708, "step": 4365 }, { - "epoch": 0.46, - "grad_norm": 2.8332048431773433, - "learning_rate": 5.89582658875987e-06, - "loss": 0.5908, + "epoch": 0.31, + "grad_norm": 1.7630502251520652, + "learning_rate": 8.083576966857737e-06, + "loss": 0.5249, "step": 4366 }, { - "epoch": 0.46, - "grad_norm": 2.8707757219917687, - "learning_rate": 5.8941498748880635e-06, - "loss": 0.6854, + "epoch": 0.31, + "grad_norm": 1.4118149716486545, + "learning_rate": 8.082672277038141e-06, + "loss": 0.4731, "step": 4367 }, { - "epoch": 0.46, - "grad_norm": 3.80342904637755, - "learning_rate": 5.892473057136508e-06, - "loss": 0.5482, + "epoch": 0.31, + "grad_norm": 1.90153931437648, + "learning_rate": 8.081767424381353e-06, + "loss": 0.5982, "step": 4368 }, { - "epoch": 0.46, - "grad_norm": 1.0613535298316306, - "learning_rate": 5.890796135700013e-06, - "loss": 0.607, + "epoch": 0.31, + "grad_norm": 1.768580922152759, + "learning_rate": 8.080862408935164e-06, + "loss": 0.5875, "step": 4369 }, { - "epoch": 0.46, - "grad_norm": 4.544050244229915, - "learning_rate": 5.889119110773398e-06, - "loss": 0.6613, + "epoch": 0.31, + "grad_norm": 1.8480290677550952, + "learning_rate": 8.079957230747384e-06, + "loss": 0.5581, "step": 4370 }, { - "epoch": 0.46, - "grad_norm": 2.225190268668775, - "learning_rate": 5.887441982551495e-06, - "loss": 0.684, + "epoch": 0.31, + "grad_norm": 1.6468822816223843, + "learning_rate": 8.079051889865826e-06, + "loss": 0.5592, "step": 4371 }, { - "epoch": 0.46, - "grad_norm": 2.406353835293022, - "learning_rate": 5.885764751229146e-06, - "loss": 0.5922, + "epoch": 0.31, + "grad_norm": 1.9295632248382428, + "learning_rate": 8.078146386338314e-06, + "loss": 0.5876, "step": 4372 }, { - "epoch": 0.46, - "grad_norm": 2.660963513951452, - "learning_rate": 5.884087417001212e-06, - "loss": 0.6724, + "epoch": 0.31, + "grad_norm": 2.2475130067083153, + "learning_rate": 8.07724072021268e-06, + "loss": 0.6384, "step": 4373 }, { - "epoch": 0.46, - "grad_norm": 2.151920515191103, - "learning_rate": 5.882409980062554e-06, - "loss": 0.6619, + "epoch": 0.31, + "grad_norm": 1.589495721815848, + "learning_rate": 8.076334891536765e-06, + "loss": 0.4819, "step": 4374 }, { - "epoch": 0.46, - "grad_norm": 4.056944032442784, - "learning_rate": 5.880732440608059e-06, - "loss": 0.689, + "epoch": 0.31, + "grad_norm": 1.753806853920163, + "learning_rate": 8.075428900358415e-06, + "loss": 0.5481, "step": 4375 }, { - "epoch": 0.46, - "grad_norm": 2.8201326385968266, - "learning_rate": 5.879054798832612e-06, - "loss": 0.6328, + "epoch": 0.31, + "grad_norm": 1.7183345256685902, + "learning_rate": 8.07452274672549e-06, + "loss": 0.4583, "step": 4376 }, { - "epoch": 0.46, - "grad_norm": 1.9966584556095535, - "learning_rate": 5.877377054931122e-06, - "loss": 0.6494, + "epoch": 0.31, + "grad_norm": 1.7499279176139844, + "learning_rate": 8.073616430685858e-06, + "loss": 0.5841, "step": 4377 }, { - "epoch": 0.46, - "grad_norm": 2.3102019347667095, - "learning_rate": 5.8756992090985e-06, - "loss": 0.6443, + "epoch": 0.31, + "grad_norm": 1.6518687474590232, + "learning_rate": 8.072709952287388e-06, + "loss": 0.6104, "step": 4378 }, { - "epoch": 0.46, - "grad_norm": 2.7187452579507503, - "learning_rate": 5.874021261529675e-06, - "loss": 0.778, + "epoch": 0.31, + "grad_norm": 1.589037743724401, + "learning_rate": 8.071803311577969e-06, + "loss": 0.5922, "step": 4379 }, { - "epoch": 0.46, - "grad_norm": 3.9391200420774966, - "learning_rate": 5.872343212419589e-06, - "loss": 0.7402, + "epoch": 0.31, + "grad_norm": 1.8547583633356861, + "learning_rate": 8.070896508605489e-06, + "loss": 0.5578, "step": 4380 }, { - "epoch": 0.46, - "grad_norm": 3.342650913183981, - "learning_rate": 5.870665061963188e-06, - "loss": 0.6401, + "epoch": 0.31, + "grad_norm": 1.892417939428766, + "learning_rate": 8.06998954341785e-06, + "loss": 0.5514, "step": 4381 }, { - "epoch": 0.46, - "grad_norm": 3.10923752534272, - "learning_rate": 5.868986810355437e-06, - "loss": 0.6497, + "epoch": 0.31, + "grad_norm": 1.6892545380800559, + "learning_rate": 8.069082416062961e-06, + "loss": 0.5928, "step": 4382 }, { - "epoch": 0.46, - "grad_norm": 2.9439328630070154, - "learning_rate": 5.867308457791311e-06, - "loss": 0.6287, + "epoch": 0.31, + "grad_norm": 1.951256835133891, + "learning_rate": 8.06817512658874e-06, + "loss": 0.5442, "step": 4383 }, { - "epoch": 0.46, - "grad_norm": 2.1724951171650644, - "learning_rate": 5.865630004465796e-06, - "loss": 0.6773, + "epoch": 0.31, + "grad_norm": 1.8286888663281617, + "learning_rate": 8.067267675043112e-06, + "loss": 0.5861, "step": 4384 }, { - "epoch": 0.46, - "grad_norm": 2.2545948900225095, - "learning_rate": 5.8639514505738885e-06, - "loss": 0.7132, + "epoch": 0.31, + "grad_norm": 1.5929972160664623, + "learning_rate": 8.066360061474013e-06, + "loss": 0.6063, "step": 4385 }, { - "epoch": 0.46, - "grad_norm": 2.339764871054603, - "learning_rate": 5.8622727963106e-06, - "loss": 0.6558, + "epoch": 0.31, + "grad_norm": 1.5943437337730584, + "learning_rate": 8.065452285929383e-06, + "loss": 0.5719, "step": 4386 }, { - "epoch": 0.46, - "grad_norm": 6.264396932081793, - "learning_rate": 5.860594041870948e-06, - "loss": 0.564, + "epoch": 0.31, + "grad_norm": 1.706720778916542, + "learning_rate": 8.064544348457177e-06, + "loss": 0.5786, "step": 4387 }, { - "epoch": 0.46, - "grad_norm": 3.0726383532297126, - "learning_rate": 5.85891518744997e-06, - "loss": 0.6943, + "epoch": 0.31, + "grad_norm": 1.705566562712168, + "learning_rate": 8.063636249105355e-06, + "loss": 0.5557, "step": 4388 }, { - "epoch": 0.46, - "grad_norm": 3.819599638111385, - "learning_rate": 5.857236233242709e-06, - "loss": 0.6919, + "epoch": 0.31, + "grad_norm": 1.4742874422019507, + "learning_rate": 8.062727987921886e-06, + "loss": 0.5453, "step": 4389 }, { - "epoch": 0.46, - "grad_norm": 2.55942614466353, - "learning_rate": 5.855557179444219e-06, - "loss": 0.6243, + "epoch": 0.31, + "grad_norm": 0.8356546737431582, + "learning_rate": 8.061819564954745e-06, + "loss": 0.4287, "step": 4390 }, { - "epoch": 0.46, - "grad_norm": 2.1630742426007186, - "learning_rate": 5.8538780262495695e-06, - "loss": 0.59, + "epoch": 0.31, + "grad_norm": 2.0457617860905035, + "learning_rate": 8.06091098025192e-06, + "loss": 0.5756, "step": 4391 }, { - "epoch": 0.46, - "grad_norm": 2.6158551871983122, - "learning_rate": 5.85219877385384e-06, - "loss": 0.7309, + "epoch": 0.31, + "grad_norm": 1.8597639148312166, + "learning_rate": 8.060002233861405e-06, + "loss": 0.5501, "step": 4392 }, { - "epoch": 0.46, - "grad_norm": 2.212409520666976, - "learning_rate": 5.8505194224521204e-06, - "loss": 0.5875, + "epoch": 0.31, + "grad_norm": 1.8253868861527018, + "learning_rate": 8.059093325831204e-06, + "loss": 0.6278, "step": 4393 }, { - "epoch": 0.46, - "grad_norm": 2.3813643477613318, - "learning_rate": 5.848839972239512e-06, - "loss": 0.6635, + "epoch": 0.31, + "grad_norm": 1.6449548680498411, + "learning_rate": 8.05818425620933e-06, + "loss": 0.4996, "step": 4394 }, { - "epoch": 0.46, - "grad_norm": 2.4275155873597285, - "learning_rate": 5.847160423411129e-06, - "loss": 0.6709, + "epoch": 0.31, + "grad_norm": 1.6376573736783098, + "learning_rate": 8.057275025043798e-06, + "loss": 0.4654, "step": 4395 }, { - "epoch": 0.46, - "grad_norm": 4.826967263615847, - "learning_rate": 5.845480776162097e-06, - "loss": 0.6407, + "epoch": 0.31, + "grad_norm": 1.8077132450217883, + "learning_rate": 8.05636563238264e-06, + "loss": 0.5387, "step": 4396 }, { - "epoch": 0.46, - "grad_norm": 2.306596082817607, - "learning_rate": 5.843801030687555e-06, - "loss": 0.6385, + "epoch": 0.31, + "grad_norm": 2.204257323010359, + "learning_rate": 8.055456078273894e-06, + "loss": 0.4928, "step": 4397 }, { - "epoch": 0.46, - "grad_norm": 1.9985283159296727, - "learning_rate": 5.842121187182644e-06, - "loss": 0.6064, + "epoch": 0.31, + "grad_norm": 1.851052738382041, + "learning_rate": 8.054546362765605e-06, + "loss": 0.5683, "step": 4398 }, { - "epoch": 0.46, - "grad_norm": 3.2091727302195725, - "learning_rate": 5.840441245842532e-06, - "loss": 0.74, + "epoch": 0.31, + "grad_norm": 1.5655120763307142, + "learning_rate": 8.053636485905824e-06, + "loss": 0.5768, "step": 4399 }, { - "epoch": 0.46, - "grad_norm": 2.8692803940553713, - "learning_rate": 5.838761206862382e-06, - "loss": 0.6428, + "epoch": 0.31, + "grad_norm": 2.3229627290818193, + "learning_rate": 8.05272644774262e-06, + "loss": 0.5171, "step": 4400 }, { - "epoch": 0.46, - "grad_norm": 2.125632738179534, - "learning_rate": 5.837081070437383e-06, - "loss": 0.7399, + "epoch": 0.31, + "grad_norm": 1.6653012427154155, + "learning_rate": 8.05181624832406e-06, + "loss": 0.5744, "step": 4401 }, { - "epoch": 0.46, - "grad_norm": 3.397361281528623, - "learning_rate": 5.835400836762723e-06, - "loss": 0.6768, + "epoch": 0.31, + "grad_norm": 1.5920469117217915, + "learning_rate": 8.050905887698225e-06, + "loss": 0.5128, "step": 4402 }, { - "epoch": 0.46, - "grad_norm": 4.962196122812087, - "learning_rate": 5.833720506033609e-06, - "loss": 0.5604, + "epoch": 0.31, + "grad_norm": 1.6416871497745171, + "learning_rate": 8.0499953659132e-06, + "loss": 0.5976, "step": 4403 }, { - "epoch": 0.46, - "grad_norm": 2.4524501870949416, - "learning_rate": 5.8320400784452566e-06, - "loss": 0.6212, + "epoch": 0.31, + "grad_norm": 1.5054131978557324, + "learning_rate": 8.049084683017089e-06, + "loss": 0.5857, "step": 4404 }, { - "epoch": 0.46, - "grad_norm": 2.242962121068071, - "learning_rate": 5.830359554192894e-06, - "loss": 0.6391, + "epoch": 0.31, + "grad_norm": 1.6527843329747083, + "learning_rate": 8.048173839057993e-06, + "loss": 0.5786, "step": 4405 }, { - "epoch": 0.46, - "grad_norm": 2.4880079804921844, - "learning_rate": 5.828678933471758e-06, - "loss": 0.6894, + "epoch": 0.31, + "grad_norm": 1.631687645263775, + "learning_rate": 8.047262834084025e-06, + "loss": 0.5489, "step": 4406 }, { - "epoch": 0.46, - "grad_norm": 0.9982078926059621, - "learning_rate": 5.826998216477097e-06, - "loss": 0.5715, + "epoch": 0.31, + "grad_norm": 2.017297972381213, + "learning_rate": 8.046351668143308e-06, + "loss": 0.609, "step": 4407 }, { - "epoch": 0.46, - "grad_norm": 3.39200422891883, - "learning_rate": 5.825317403404177e-06, - "loss": 0.7124, + "epoch": 0.31, + "grad_norm": 1.4032622610033698, + "learning_rate": 8.045440341283974e-06, + "loss": 0.5441, "step": 4408 }, { - "epoch": 0.46, - "grad_norm": 2.5758571917643183, - "learning_rate": 5.823636494448265e-06, - "loss": 0.6916, + "epoch": 0.31, + "grad_norm": 1.8902849389334295, + "learning_rate": 8.044528853554163e-06, + "loss": 0.5373, "step": 4409 }, { - "epoch": 0.46, - "grad_norm": 2.0155720379239783, - "learning_rate": 5.821955489804647e-06, - "loss": 0.5974, + "epoch": 0.31, + "grad_norm": 2.3570115539865464, + "learning_rate": 8.043617205002021e-06, + "loss": 0.558, "step": 4410 }, { - "epoch": 0.46, - "grad_norm": 2.4690764237144096, - "learning_rate": 5.820274389668614e-06, - "loss": 0.6291, + "epoch": 0.31, + "grad_norm": 1.508358673101007, + "learning_rate": 8.042705395675706e-06, + "loss": 0.4549, "step": 4411 }, { - "epoch": 0.46, - "grad_norm": 2.4569169908883968, - "learning_rate": 5.818593194235475e-06, - "loss": 0.675, + "epoch": 0.31, + "grad_norm": 1.6710909172106094, + "learning_rate": 8.041793425623379e-06, + "loss": 0.5376, "step": 4412 }, { - "epoch": 0.46, - "grad_norm": 2.0672128706646213, - "learning_rate": 5.816911903700546e-06, - "loss": 0.6804, + "epoch": 0.31, + "grad_norm": 1.5440937493416225, + "learning_rate": 8.040881294893219e-06, + "loss": 0.5342, "step": 4413 }, { - "epoch": 0.46, - "grad_norm": 1.9976719347095646, - "learning_rate": 5.815230518259153e-06, - "loss": 0.6844, + "epoch": 0.31, + "grad_norm": 4.5091530763210494, + "learning_rate": 8.039969003533402e-06, + "loss": 0.577, "step": 4414 }, { - "epoch": 0.46, - "grad_norm": 2.564923614303906, - "learning_rate": 5.813549038106635e-06, - "loss": 0.6927, + "epoch": 0.31, + "grad_norm": 1.7801560158150778, + "learning_rate": 8.039056551592126e-06, + "loss": 0.552, "step": 4415 }, { - "epoch": 0.46, - "grad_norm": 2.3811759370101084, - "learning_rate": 5.811867463438341e-06, - "loss": 0.628, + "epoch": 0.31, + "grad_norm": 1.7719604380705163, + "learning_rate": 8.038143939117581e-06, + "loss": 0.5725, "step": 4416 }, { - "epoch": 0.46, - "grad_norm": 2.0628079283250282, - "learning_rate": 5.810185794449633e-06, - "loss": 0.5905, + "epoch": 0.31, + "grad_norm": 1.6638886352313746, + "learning_rate": 8.037231166157982e-06, + "loss": 0.5585, "step": 4417 }, { - "epoch": 0.46, - "grad_norm": 2.582538159728672, - "learning_rate": 5.80850403133588e-06, - "loss": 0.7048, + "epoch": 0.31, + "grad_norm": 1.6510351447302818, + "learning_rate": 8.036318232761537e-06, + "loss": 0.4978, "step": 4418 }, { - "epoch": 0.47, - "grad_norm": 2.4157594950729466, - "learning_rate": 5.806822174292467e-06, - "loss": 0.5557, + "epoch": 0.31, + "grad_norm": 1.871503231040386, + "learning_rate": 8.035405138976478e-06, + "loss": 0.5767, "step": 4419 }, { - "epoch": 0.47, - "grad_norm": 0.990310949015315, - "learning_rate": 5.805140223514785e-06, - "loss": 0.5572, + "epoch": 0.31, + "grad_norm": 1.555416722844969, + "learning_rate": 8.034491884851033e-06, + "loss": 0.5659, "step": 4420 }, { - "epoch": 0.47, - "grad_norm": 3.0489038974046543, - "learning_rate": 5.80345817919824e-06, - "loss": 0.6249, + "epoch": 0.31, + "grad_norm": 3.0922247352723184, + "learning_rate": 8.033578470433446e-06, + "loss": 0.5018, "step": 4421 }, { - "epoch": 0.47, - "grad_norm": 2.060292665650145, - "learning_rate": 5.801776041538245e-06, - "loss": 0.6035, + "epoch": 0.31, + "grad_norm": 1.2813486735182682, + "learning_rate": 8.032664895771962e-06, + "loss": 0.4989, "step": 4422 }, { - "epoch": 0.47, - "grad_norm": 2.091882121485412, - "learning_rate": 5.800093810730229e-06, - "loss": 0.6245, + "epoch": 0.31, + "grad_norm": 1.8741227824420426, + "learning_rate": 8.031751160914843e-06, + "loss": 0.6166, "step": 4423 }, { - "epoch": 0.47, - "grad_norm": 3.179974949588236, - "learning_rate": 5.798411486969626e-06, - "loss": 0.6589, + "epoch": 0.31, + "grad_norm": 1.5000998441967948, + "learning_rate": 8.030837265910357e-06, + "loss": 0.6041, "step": 4424 }, { - "epoch": 0.47, - "grad_norm": 2.138657402068139, - "learning_rate": 5.796729070451884e-06, - "loss": 0.6545, + "epoch": 0.31, + "grad_norm": 2.129383033356509, + "learning_rate": 8.029923210806774e-06, + "loss": 0.5392, "step": 4425 }, { - "epoch": 0.47, - "grad_norm": 2.233621588999477, - "learning_rate": 5.795046561372463e-06, - "loss": 0.681, + "epoch": 0.31, + "grad_norm": 1.8130086667318466, + "learning_rate": 8.029008995652382e-06, + "loss": 0.5489, "step": 4426 }, { - "epoch": 0.47, - "grad_norm": 3.004996496095365, - "learning_rate": 5.79336395992683e-06, - "loss": 0.6399, + "epoch": 0.31, + "grad_norm": 1.3792791624346519, + "learning_rate": 8.02809462049547e-06, + "loss": 0.5078, "step": 4427 }, { - "epoch": 0.47, - "grad_norm": 2.069729876929192, - "learning_rate": 5.791681266310465e-06, - "loss": 0.6049, + "epoch": 0.31, + "grad_norm": 1.6996148349689444, + "learning_rate": 8.02718008538434e-06, + "loss": 0.5372, "step": 4428 }, { - "epoch": 0.47, - "grad_norm": 2.4465891459726623, - "learning_rate": 5.78999848071886e-06, - "loss": 0.6035, + "epoch": 0.31, + "grad_norm": 2.1900426952872643, + "learning_rate": 8.026265390367301e-06, + "loss": 0.5019, "step": 4429 }, { - "epoch": 0.47, - "grad_norm": 2.2899552504847716, - "learning_rate": 5.788315603347515e-06, - "loss": 0.6646, + "epoch": 0.31, + "grad_norm": 2.593723290340787, + "learning_rate": 8.025350535492668e-06, + "loss": 0.5334, "step": 4430 }, { - "epoch": 0.47, - "grad_norm": 2.750752115170631, - "learning_rate": 5.78663263439194e-06, - "loss": 0.6953, + "epoch": 0.31, + "grad_norm": 10.44624339850832, + "learning_rate": 8.024435520808771e-06, + "loss": 0.5334, "step": 4431 }, { - "epoch": 0.47, - "grad_norm": 3.2813221898068594, - "learning_rate": 5.7849495740476625e-06, - "loss": 0.7107, + "epoch": 0.31, + "grad_norm": 1.5701165663393772, + "learning_rate": 8.02352034636394e-06, + "loss": 0.4924, "step": 4432 }, { - "epoch": 0.47, - "grad_norm": 4.412245921640548, - "learning_rate": 5.783266422510211e-06, - "loss": 0.6862, + "epoch": 0.31, + "grad_norm": 1.4352567825167777, + "learning_rate": 8.02260501220652e-06, + "loss": 0.4856, "step": 4433 }, { - "epoch": 0.47, - "grad_norm": 2.1713157933842573, - "learning_rate": 5.781583179975132e-06, - "loss": 0.7024, + "epoch": 0.31, + "grad_norm": 1.516147223360259, + "learning_rate": 8.021689518384861e-06, + "loss": 0.5091, "step": 4434 }, { - "epoch": 0.47, - "grad_norm": 2.5292807136122497, - "learning_rate": 5.779899846637976e-06, - "loss": 0.6344, + "epoch": 0.31, + "grad_norm": 3.545874455876537, + "learning_rate": 8.020773864947322e-06, + "loss": 0.5816, "step": 4435 }, { - "epoch": 0.47, - "grad_norm": 2.638992479723519, - "learning_rate": 5.778216422694312e-06, - "loss": 0.6147, + "epoch": 0.31, + "grad_norm": 0.8606104695082516, + "learning_rate": 8.019858051942272e-06, + "loss": 0.471, "step": 4436 }, { - "epoch": 0.47, - "grad_norm": 2.253511934867257, - "learning_rate": 5.776532908339713e-06, - "loss": 0.6775, + "epoch": 0.31, + "grad_norm": 1.654508153617837, + "learning_rate": 8.018942079418086e-06, + "loss": 0.4604, "step": 4437 }, { - "epoch": 0.47, - "grad_norm": 2.5189103997047404, - "learning_rate": 5.774849303769767e-06, - "loss": 0.6079, + "epoch": 0.31, + "grad_norm": 1.543569895367182, + "learning_rate": 8.018025947423152e-06, + "loss": 0.5101, "step": 4438 }, { - "epoch": 0.47, - "grad_norm": 1.9551608372390805, - "learning_rate": 5.773165609180067e-06, - "loss": 0.6591, + "epoch": 0.32, + "grad_norm": 0.7265870282893965, + "learning_rate": 8.01710965600586e-06, + "loss": 0.4553, "step": 4439 }, { - "epoch": 0.47, - "grad_norm": 2.0567111687412636, - "learning_rate": 5.771481824766222e-06, - "loss": 0.6149, + "epoch": 0.32, + "grad_norm": 1.6121640993008763, + "learning_rate": 8.016193205214611e-06, + "loss": 0.5783, "step": 4440 }, { - "epoch": 0.47, - "grad_norm": 2.063349581374693, - "learning_rate": 5.769797950723848e-06, - "loss": 0.6967, + "epoch": 0.32, + "grad_norm": 1.7533551803530916, + "learning_rate": 8.015276595097819e-06, + "loss": 0.5957, "step": 4441 }, { - "epoch": 0.47, - "grad_norm": 2.1228306731836386, - "learning_rate": 5.7681139872485744e-06, - "loss": 0.6664, + "epoch": 0.32, + "grad_norm": 1.6588017944263151, + "learning_rate": 8.014359825703899e-06, + "loss": 0.5314, "step": 4442 }, { - "epoch": 0.47, - "grad_norm": 2.793644978792627, - "learning_rate": 5.766429934536037e-06, - "loss": 0.5862, + "epoch": 0.32, + "grad_norm": 1.9388030653136485, + "learning_rate": 8.01344289708128e-06, + "loss": 0.5418, "step": 4443 }, { - "epoch": 0.47, - "grad_norm": 1.0595037162948158, - "learning_rate": 5.764745792781886e-06, - "loss": 0.5796, + "epoch": 0.32, + "grad_norm": 0.9020992811052921, + "learning_rate": 8.012525809278395e-06, + "loss": 0.4479, "step": 4444 }, { - "epoch": 0.47, - "grad_norm": 2.6360229263832173, - "learning_rate": 5.763061562181781e-06, - "loss": 0.5961, + "epoch": 0.32, + "grad_norm": 1.9351774861741087, + "learning_rate": 8.01160856234369e-06, + "loss": 0.5352, "step": 4445 }, { - "epoch": 0.47, - "grad_norm": 2.317300668949924, - "learning_rate": 5.761377242931386e-06, - "loss": 0.6888, + "epoch": 0.32, + "grad_norm": 1.4859691769592243, + "learning_rate": 8.010691156325615e-06, + "loss": 0.5188, "step": 4446 }, { - "epoch": 0.47, - "grad_norm": 2.3873429958437904, - "learning_rate": 5.759692835226387e-06, - "loss": 0.6889, + "epoch": 0.32, + "grad_norm": 1.5151457406320967, + "learning_rate": 8.009773591272632e-06, + "loss": 0.5859, "step": 4447 }, { - "epoch": 0.47, - "grad_norm": 2.2136534312552785, - "learning_rate": 5.75800833926247e-06, - "loss": 0.6915, + "epoch": 0.32, + "grad_norm": 1.5703420199436302, + "learning_rate": 8.00885586723321e-06, + "loss": 0.5136, "step": 4448 }, { - "epoch": 0.47, - "grad_norm": 3.0974828764619353, - "learning_rate": 5.756323755235334e-06, - "loss": 0.5588, + "epoch": 0.32, + "grad_norm": 5.396491164384794, + "learning_rate": 8.007937984255825e-06, + "loss": 0.5428, "step": 4449 }, { - "epoch": 0.47, - "grad_norm": 2.4333697468641398, - "learning_rate": 5.754639083340691e-06, - "loss": 0.7895, + "epoch": 0.32, + "grad_norm": 1.6465074732482576, + "learning_rate": 8.007019942388962e-06, + "loss": 0.624, "step": 4450 }, { - "epoch": 0.47, - "grad_norm": 3.480667649075754, - "learning_rate": 5.752954323774261e-06, - "loss": 0.6759, + "epoch": 0.32, + "grad_norm": 1.6029632061940269, + "learning_rate": 8.006101741681118e-06, + "loss": 0.5623, "step": 4451 }, { - "epoch": 0.47, - "grad_norm": 2.26250369737515, - "learning_rate": 5.751269476731775e-06, - "loss": 0.5934, + "epoch": 0.32, + "grad_norm": 2.4565659717098747, + "learning_rate": 8.005183382180794e-06, + "loss": 0.54, "step": 4452 }, { - "epoch": 0.47, - "grad_norm": 1.1003678544450188, - "learning_rate": 5.749584542408971e-06, - "loss": 0.5815, + "epoch": 0.32, + "grad_norm": 1.555647189325073, + "learning_rate": 8.004264863936502e-06, + "loss": 0.5572, "step": 4453 }, { - "epoch": 0.47, - "grad_norm": 2.3078642309343578, - "learning_rate": 5.747899521001603e-06, - "loss": 0.6309, + "epoch": 0.32, + "grad_norm": 1.828365628222904, + "learning_rate": 8.003346186996756e-06, + "loss": 0.6101, "step": 4454 }, { - "epoch": 0.47, - "grad_norm": 0.9538174909326671, - "learning_rate": 5.74621441270543e-06, - "loss": 0.5846, + "epoch": 0.32, + "grad_norm": 1.6901478939052879, + "learning_rate": 8.00242735141009e-06, + "loss": 0.5556, "step": 4455 }, { - "epoch": 0.47, - "grad_norm": 2.3481363612900386, - "learning_rate": 5.744529217716225e-06, - "loss": 0.6627, + "epoch": 0.32, + "grad_norm": 0.8712724233170649, + "learning_rate": 8.00150835722504e-06, + "loss": 0.4466, "step": 4456 }, { - "epoch": 0.47, - "grad_norm": 2.4288667378870494, - "learning_rate": 5.742843936229765e-06, - "loss": 0.6473, + "epoch": 0.32, + "grad_norm": 1.7088807555980134, + "learning_rate": 8.000589204490146e-06, + "loss": 0.57, "step": 4457 }, { - "epoch": 0.47, - "grad_norm": 12.117953706010812, - "learning_rate": 5.741158568441846e-06, - "loss": 0.6602, + "epoch": 0.32, + "grad_norm": 3.2141540731027596, + "learning_rate": 7.999669893253962e-06, + "loss": 0.5314, "step": 4458 }, { - "epoch": 0.47, - "grad_norm": 2.9795673957075657, - "learning_rate": 5.739473114548266e-06, - "loss": 0.6127, + "epoch": 0.32, + "grad_norm": 0.6963331081269266, + "learning_rate": 7.998750423565051e-06, + "loss": 0.4718, "step": 4459 }, { - "epoch": 0.47, - "grad_norm": 2.184917637480841, - "learning_rate": 5.737787574744837e-06, - "loss": 0.7399, + "epoch": 0.32, + "grad_norm": 2.17820570800185, + "learning_rate": 7.997830795471982e-06, + "loss": 0.5976, "step": 4460 }, { - "epoch": 0.47, - "grad_norm": 3.0453232292377943, - "learning_rate": 5.736101949227382e-06, - "loss": 0.6086, + "epoch": 0.32, + "grad_norm": 1.774226659078033, + "learning_rate": 7.996911009023332e-06, + "loss": 0.5779, "step": 4461 }, { - "epoch": 0.47, - "grad_norm": 2.146930741168219, - "learning_rate": 5.734416238191729e-06, - "loss": 0.6511, + "epoch": 0.32, + "grad_norm": 1.9927187723692574, + "learning_rate": 7.995991064267688e-06, + "loss": 0.527, "step": 4462 }, { - "epoch": 0.47, - "grad_norm": 2.0161096829790184, - "learning_rate": 5.73273044183372e-06, - "loss": 0.5465, + "epoch": 0.32, + "grad_norm": 1.7032996985433786, + "learning_rate": 7.995070961253643e-06, + "loss": 0.5751, "step": 4463 }, { - "epoch": 0.47, - "grad_norm": 3.4064385189492246, - "learning_rate": 5.7310445603492085e-06, - "loss": 0.6235, + "epoch": 0.32, + "grad_norm": 1.5149946396537313, + "learning_rate": 7.994150700029802e-06, + "loss": 0.5839, "step": 4464 }, { - "epoch": 0.47, - "grad_norm": 2.1516478687948544, - "learning_rate": 5.729358593934051e-06, - "loss": 0.6553, + "epoch": 0.32, + "grad_norm": 2.4162634144981725, + "learning_rate": 7.993230280644776e-06, + "loss": 0.541, "step": 4465 }, { - "epoch": 0.47, - "grad_norm": 2.013275127090678, - "learning_rate": 5.727672542784122e-06, - "loss": 0.631, + "epoch": 0.32, + "grad_norm": 2.676480270237703, + "learning_rate": 7.992309703147182e-06, + "loss": 0.5292, "step": 4466 }, { - "epoch": 0.47, - "grad_norm": 2.21296220940741, - "learning_rate": 5.7259864070953e-06, - "loss": 0.6722, + "epoch": 0.32, + "grad_norm": 1.479740266899018, + "learning_rate": 7.991388967585651e-06, + "loss": 0.6112, "step": 4467 }, { - "epoch": 0.47, - "grad_norm": 2.0788929579185234, - "learning_rate": 5.724300187063474e-06, - "loss": 0.6972, + "epoch": 0.32, + "grad_norm": 1.7723440645167579, + "learning_rate": 7.99046807400882e-06, + "loss": 0.6242, "step": 4468 }, { - "epoch": 0.47, - "grad_norm": 2.2057902914842415, - "learning_rate": 5.722613882884549e-06, - "loss": 0.6598, + "epoch": 0.32, + "grad_norm": 1.59285037317661, + "learning_rate": 7.989547022465329e-06, + "loss": 0.5629, "step": 4469 }, { - "epoch": 0.47, - "grad_norm": 2.3957705076731886, - "learning_rate": 5.720927494754429e-06, - "loss": 0.6991, + "epoch": 0.32, + "grad_norm": 1.5945130490743458, + "learning_rate": 7.988625813003837e-06, + "loss": 0.5705, "step": 4470 }, { - "epoch": 0.47, - "grad_norm": 10.12211879589646, - "learning_rate": 5.719241022869039e-06, - "loss": 0.6594, + "epoch": 0.32, + "grad_norm": 1.4857965778624243, + "learning_rate": 7.987704445673003e-06, + "loss": 0.5371, "step": 4471 }, { - "epoch": 0.47, - "grad_norm": 3.490904530002642, - "learning_rate": 5.7175544674243044e-06, - "loss": 0.5995, + "epoch": 0.32, + "grad_norm": 2.2266456793826177, + "learning_rate": 7.986782920521494e-06, + "loss": 0.516, "step": 4472 }, { - "epoch": 0.47, - "grad_norm": 5.934704124433464, - "learning_rate": 5.715867828616167e-06, - "loss": 0.6718, + "epoch": 0.32, + "grad_norm": 1.6672977359438765, + "learning_rate": 7.985861237597994e-06, + "loss": 0.5026, "step": 4473 }, { - "epoch": 0.47, - "grad_norm": 2.518726432040352, - "learning_rate": 5.714181106640575e-06, - "loss": 0.7023, + "epoch": 0.32, + "grad_norm": 1.5547223361143907, + "learning_rate": 7.984939396951183e-06, + "loss": 0.5309, "step": 4474 }, { - "epoch": 0.47, - "grad_norm": 2.2024550220706205, - "learning_rate": 5.712494301693486e-06, - "loss": 0.6435, + "epoch": 0.32, + "grad_norm": 1.5197904351312341, + "learning_rate": 7.98401739862976e-06, + "loss": 0.5885, "step": 4475 }, { - "epoch": 0.47, - "grad_norm": 4.867057001543077, - "learning_rate": 5.710807413970868e-06, - "loss": 0.6585, + "epoch": 0.32, + "grad_norm": 0.9498977692662619, + "learning_rate": 7.983095242682427e-06, + "loss": 0.4787, "step": 4476 }, { - "epoch": 0.47, - "grad_norm": 2.3892252500285447, - "learning_rate": 5.709120443668701e-06, - "loss": 0.6105, + "epoch": 0.32, + "grad_norm": 0.8689202304987352, + "learning_rate": 7.982172929157894e-06, + "loss": 0.454, "step": 4477 }, { - "epoch": 0.47, - "grad_norm": 2.751702391769238, - "learning_rate": 5.707433390982969e-06, - "loss": 0.6801, + "epoch": 0.32, + "grad_norm": 1.6338223805969365, + "learning_rate": 7.981250458104884e-06, + "loss": 0.5952, "step": 4478 }, { - "epoch": 0.47, - "grad_norm": 2.2281246093897633, - "learning_rate": 5.705746256109671e-06, - "loss": 0.6254, + "epoch": 0.32, + "grad_norm": 1.7749455954973212, + "learning_rate": 7.980327829572121e-06, + "loss": 0.5794, "step": 4479 }, { - "epoch": 0.47, - "grad_norm": 3.2065787831492654, - "learning_rate": 5.704059039244814e-06, - "loss": 0.6247, + "epoch": 0.32, + "grad_norm": 1.6229458195489774, + "learning_rate": 7.979405043608343e-06, + "loss": 0.6236, "step": 4480 }, { - "epoch": 0.47, - "grad_norm": 3.4211374534691665, - "learning_rate": 5.7023717405844114e-06, - "loss": 0.6601, + "epoch": 0.32, + "grad_norm": 1.6329162251794298, + "learning_rate": 7.978482100262296e-06, + "loss": 0.6409, "step": 4481 }, { - "epoch": 0.47, - "grad_norm": 8.97147154570612, - "learning_rate": 5.700684360324492e-06, - "loss": 0.6468, + "epoch": 0.32, + "grad_norm": 2.193643476827921, + "learning_rate": 7.977558999582732e-06, + "loss": 0.5802, "step": 4482 }, { - "epoch": 0.47, - "grad_norm": 5.36936656327254, - "learning_rate": 5.6989968986610876e-06, - "loss": 0.6256, + "epoch": 0.32, + "grad_norm": 1.6749903668566997, + "learning_rate": 7.976635741618413e-06, + "loss": 0.5301, "step": 4483 }, { - "epoch": 0.47, - "grad_norm": 2.6783669889814394, - "learning_rate": 5.697309355790246e-06, - "loss": 0.7214, + "epoch": 0.32, + "grad_norm": 1.6615565346152998, + "learning_rate": 7.975712326418105e-06, + "loss": 0.5751, "step": 4484 }, { - "epoch": 0.47, - "grad_norm": 2.3024950794881716, - "learning_rate": 5.695621731908018e-06, - "loss": 0.6098, + "epoch": 0.32, + "grad_norm": 1.62038807559947, + "learning_rate": 7.974788754030594e-06, + "loss": 0.6057, "step": 4485 }, { - "epoch": 0.47, - "grad_norm": 2.2375276123528454, - "learning_rate": 5.693934027210468e-06, - "loss": 0.6077, + "epoch": 0.32, + "grad_norm": 1.484562265849183, + "learning_rate": 7.973865024504657e-06, + "loss": 0.5687, "step": 4486 }, { - "epoch": 0.47, - "grad_norm": 2.3754390849560383, - "learning_rate": 5.692246241893669e-06, - "loss": 0.6369, + "epoch": 0.32, + "grad_norm": 1.6132801034854432, + "learning_rate": 7.972941137889091e-06, + "loss": 0.5501, "step": 4487 }, { - "epoch": 0.47, - "grad_norm": 2.1211748838319964, - "learning_rate": 5.6905583761537034e-06, - "loss": 0.6388, + "epoch": 0.32, + "grad_norm": 1.6833034714854715, + "learning_rate": 7.972017094232704e-06, + "loss": 0.5743, "step": 4488 }, { - "epoch": 0.47, - "grad_norm": 3.868019959818687, - "learning_rate": 5.68887043018666e-06, - "loss": 0.6607, + "epoch": 0.32, + "grad_norm": 1.7706967026271705, + "learning_rate": 7.971092893584303e-06, + "loss": 0.6035, "step": 4489 }, { - "epoch": 0.47, - "grad_norm": 2.33570658417535, - "learning_rate": 5.687182404188642e-06, - "loss": 0.6159, + "epoch": 0.32, + "grad_norm": 1.7192143859114, + "learning_rate": 7.970168535992708e-06, + "loss": 0.568, "step": 4490 }, { - "epoch": 0.47, - "grad_norm": 2.8823599155269486, - "learning_rate": 5.68549429835576e-06, - "loss": 0.535, + "epoch": 0.32, + "grad_norm": 1.6111346607601964, + "learning_rate": 7.969244021506746e-06, + "loss": 0.5628, "step": 4491 }, { - "epoch": 0.47, - "grad_norm": 2.743721547670784, - "learning_rate": 5.6838061128841294e-06, - "loss": 0.6538, + "epoch": 0.32, + "grad_norm": 1.7837576470298593, + "learning_rate": 7.968319350175253e-06, + "loss": 0.5817, "step": 4492 }, { - "epoch": 0.47, - "grad_norm": 3.8002300637615645, - "learning_rate": 5.682117847969884e-06, - "loss": 0.6659, + "epoch": 0.32, + "grad_norm": 2.3521576851212807, + "learning_rate": 7.967394522047074e-06, + "loss": 0.5707, "step": 4493 }, { - "epoch": 0.47, - "grad_norm": 1.0584871886426965, - "learning_rate": 5.680429503809157e-06, - "loss": 0.5985, + "epoch": 0.32, + "grad_norm": 1.5679528794355437, + "learning_rate": 7.966469537171062e-06, + "loss": 0.581, "step": 4494 }, { - "epoch": 0.47, - "grad_norm": 3.525503447996165, - "learning_rate": 5.678741080598098e-06, - "loss": 0.5908, + "epoch": 0.32, + "grad_norm": 1.4696717201102523, + "learning_rate": 7.965544395596078e-06, + "loss": 0.5348, "step": 4495 }, { - "epoch": 0.47, - "grad_norm": 2.687907042607034, - "learning_rate": 5.6770525785328625e-06, - "loss": 0.6845, + "epoch": 0.32, + "grad_norm": 1.9174997555897697, + "learning_rate": 7.96461909737099e-06, + "loss": 0.5224, "step": 4496 }, { - "epoch": 0.47, - "grad_norm": 2.304902889828247, - "learning_rate": 5.675363997809616e-06, - "loss": 0.6827, + "epoch": 0.32, + "grad_norm": 1.654877502960659, + "learning_rate": 7.963693642544674e-06, + "loss": 0.5754, "step": 4497 }, { - "epoch": 0.47, - "grad_norm": 2.4406252620899282, - "learning_rate": 5.6736753386245315e-06, - "loss": 0.5224, + "epoch": 0.32, + "grad_norm": 1.5030211064366676, + "learning_rate": 7.96276803116602e-06, + "loss": 0.6039, "step": 4498 }, { - "epoch": 0.47, - "grad_norm": 2.5325199094461035, - "learning_rate": 5.6719866011737934e-06, - "loss": 0.6689, + "epoch": 0.32, + "grad_norm": 1.4837898793545214, + "learning_rate": 7.961842263283917e-06, + "loss": 0.567, "step": 4499 }, { - "epoch": 0.47, - "grad_norm": 2.375682344749398, - "learning_rate": 5.670297785653596e-06, - "loss": 0.6651, + "epoch": 0.32, + "grad_norm": 2.123706498826837, + "learning_rate": 7.960916338947272e-06, + "loss": 0.5803, "step": 4500 }, { - "epoch": 0.47, - "grad_norm": 2.425658142737432, - "learning_rate": 5.668608892260138e-06, - "loss": 0.7422, + "epoch": 0.32, + "grad_norm": 1.5059772850905464, + "learning_rate": 7.959990258204991e-06, + "loss": 0.5078, "step": 4501 }, { - "epoch": 0.47, - "grad_norm": 2.0617808401615054, - "learning_rate": 5.666919921189632e-06, - "loss": 0.6425, + "epoch": 0.32, + "grad_norm": 0.779109182585933, + "learning_rate": 7.959064021105996e-06, + "loss": 0.4295, "step": 4502 }, { - "epoch": 0.47, - "grad_norm": 2.272673910971418, - "learning_rate": 5.665230872638297e-06, - "loss": 0.648, + "epoch": 0.32, + "grad_norm": 1.847811338926398, + "learning_rate": 7.958137627699212e-06, + "loss": 0.5925, "step": 4503 }, { - "epoch": 0.47, - "grad_norm": 2.572982484112035, - "learning_rate": 5.6635417468023635e-06, - "loss": 0.6951, + "epoch": 0.32, + "grad_norm": 1.9636184946817203, + "learning_rate": 7.957211078033575e-06, + "loss": 0.5919, "step": 4504 }, { - "epoch": 0.47, - "grad_norm": 3.0084581361854075, - "learning_rate": 5.661852543878067e-06, - "loss": 0.6481, + "epoch": 0.32, + "grad_norm": 1.6577629756275731, + "learning_rate": 7.956284372158028e-06, + "loss": 0.6655, "step": 4505 }, { - "epoch": 0.47, - "grad_norm": 2.0157127811635567, - "learning_rate": 5.660163264061656e-06, - "loss": 0.585, + "epoch": 0.32, + "grad_norm": 1.5846649385824587, + "learning_rate": 7.955357510121524e-06, + "loss": 0.4945, "step": 4506 }, { - "epoch": 0.47, - "grad_norm": 2.5432077638654644, - "learning_rate": 5.6584739075493835e-06, - "loss": 0.6619, + "epoch": 0.32, + "grad_norm": 1.8124067855794213, + "learning_rate": 7.954430491973023e-06, + "loss": 0.5129, "step": 4507 }, { - "epoch": 0.47, - "grad_norm": 2.5133640279691036, - "learning_rate": 5.656784474537518e-06, - "loss": 0.612, + "epoch": 0.32, + "grad_norm": 2.016856522753694, + "learning_rate": 7.95350331776149e-06, + "loss": 0.5098, "step": 4508 }, { - "epoch": 0.47, - "grad_norm": 2.9315990055007775, - "learning_rate": 5.65509496522233e-06, - "loss": 0.6136, + "epoch": 0.32, + "grad_norm": 1.6177259720635517, + "learning_rate": 7.952575987535907e-06, + "loss": 0.5827, "step": 4509 }, { - "epoch": 0.47, - "grad_norm": 3.2810358608137027, - "learning_rate": 5.653405379800102e-06, - "loss": 0.6993, + "epoch": 0.32, + "grad_norm": 1.6319720356922793, + "learning_rate": 7.951648501345252e-06, + "loss": 0.5377, "step": 4510 }, { - "epoch": 0.47, - "grad_norm": 2.493229989865747, - "learning_rate": 5.651715718467127e-06, - "loss": 0.657, + "epoch": 0.32, + "grad_norm": 1.478789518858757, + "learning_rate": 7.950720859238526e-06, + "loss": 0.5319, "step": 4511 }, { - "epoch": 0.47, - "grad_norm": 2.6802549573204453, - "learning_rate": 5.6500259814197025e-06, - "loss": 0.6624, + "epoch": 0.32, + "grad_norm": 0.8025768120310639, + "learning_rate": 7.949793061264723e-06, + "loss": 0.426, "step": 4512 }, { - "epoch": 0.47, - "grad_norm": 2.625253414667082, - "learning_rate": 5.648336168854139e-06, - "loss": 0.6646, + "epoch": 0.32, + "grad_norm": 1.9550486707947519, + "learning_rate": 7.948865107472857e-06, + "loss": 0.592, "step": 4513 }, { - "epoch": 0.48, - "grad_norm": 2.3868204390514567, - "learning_rate": 5.646646280966755e-06, - "loss": 0.6926, + "epoch": 0.32, + "grad_norm": 1.4653801111503884, + "learning_rate": 7.94793699791194e-06, + "loss": 0.5601, "step": 4514 }, { - "epoch": 0.48, - "grad_norm": 2.4290428676146343, - "learning_rate": 5.6449563179538734e-06, - "loss": 0.5732, + "epoch": 0.32, + "grad_norm": 2.0203183975002426, + "learning_rate": 7.947008732631006e-06, + "loss": 0.5809, "step": 4515 }, { - "epoch": 0.48, - "grad_norm": 2.6813004334794983, - "learning_rate": 5.64326628001183e-06, - "loss": 0.6179, + "epoch": 0.32, + "grad_norm": 1.655132457114526, + "learning_rate": 7.946080311679084e-06, + "loss": 0.4701, "step": 4516 }, { - "epoch": 0.48, - "grad_norm": 2.2908656332176767, - "learning_rate": 5.641576167336972e-06, - "loss": 0.7049, + "epoch": 0.32, + "grad_norm": 1.822212767802605, + "learning_rate": 7.945151735105215e-06, + "loss": 0.578, "step": 4517 }, { - "epoch": 0.48, - "grad_norm": 2.5593906258069583, - "learning_rate": 5.639885980125649e-06, - "loss": 0.5926, + "epoch": 0.32, + "grad_norm": 1.589070411424292, + "learning_rate": 7.944223002958453e-06, + "loss": 0.6412, "step": 4518 }, { - "epoch": 0.48, - "grad_norm": 2.7153221598085593, - "learning_rate": 5.638195718574222e-06, - "loss": 0.7009, + "epoch": 0.32, + "grad_norm": 2.1119884663302275, + "learning_rate": 7.943294115287856e-06, + "loss": 0.5859, "step": 4519 }, { - "epoch": 0.48, - "grad_norm": 2.570454238663235, - "learning_rate": 5.636505382879061e-06, - "loss": 0.6327, + "epoch": 0.32, + "grad_norm": 1.8199709617786453, + "learning_rate": 7.94236507214249e-06, + "loss": 0.5654, "step": 4520 }, { - "epoch": 0.48, - "grad_norm": 2.8531174704597646, - "learning_rate": 5.6348149732365465e-06, - "loss": 0.6287, + "epoch": 0.32, + "grad_norm": 1.9863992137782003, + "learning_rate": 7.941435873571432e-06, + "loss": 0.628, "step": 4521 }, { - "epoch": 0.48, - "grad_norm": 6.518867643695685, - "learning_rate": 5.633124489843063e-06, - "loss": 0.6584, + "epoch": 0.32, + "grad_norm": 1.5501456685426465, + "learning_rate": 7.940506519623765e-06, + "loss": 0.5113, "step": 4522 }, { - "epoch": 0.48, - "grad_norm": 3.940669081934954, - "learning_rate": 5.631433932895005e-06, - "loss": 0.7226, + "epoch": 0.32, + "grad_norm": 1.5485640364553208, + "learning_rate": 7.939577010348577e-06, + "loss": 0.598, "step": 4523 }, { - "epoch": 0.48, - "grad_norm": 2.262500330185508, - "learning_rate": 5.62974330258878e-06, - "loss": 0.5943, + "epoch": 0.32, + "grad_norm": 1.692670723414335, + "learning_rate": 7.938647345794972e-06, + "loss": 0.5609, "step": 4524 }, { - "epoch": 0.48, - "grad_norm": 2.668293506438283, - "learning_rate": 5.6280525991207954e-06, - "loss": 0.6074, + "epoch": 0.32, + "grad_norm": 1.792265035856386, + "learning_rate": 7.937717526012058e-06, + "loss": 0.5915, "step": 4525 }, { - "epoch": 0.48, - "grad_norm": 2.5241175283242465, - "learning_rate": 5.626361822687478e-06, - "loss": 0.6424, + "epoch": 0.32, + "grad_norm": 1.602092458893521, + "learning_rate": 7.936787551048949e-06, + "loss": 0.4977, "step": 4526 }, { - "epoch": 0.48, - "grad_norm": 2.3985700650396047, - "learning_rate": 5.6246709734852535e-06, - "loss": 0.6714, + "epoch": 0.32, + "grad_norm": 1.4681985428946687, + "learning_rate": 7.935857420954769e-06, + "loss": 0.6056, "step": 4527 }, { - "epoch": 0.48, - "grad_norm": 2.762272787782166, - "learning_rate": 5.6229800517105615e-06, - "loss": 0.7199, + "epoch": 0.32, + "grad_norm": 1.7377218132451826, + "learning_rate": 7.934927135778654e-06, + "loss": 0.6101, "step": 4528 }, { - "epoch": 0.48, - "grad_norm": 3.74146694769342, - "learning_rate": 5.621289057559847e-06, - "loss": 0.6983, + "epoch": 0.32, + "grad_norm": 1.875245943356162, + "learning_rate": 7.93399669556974e-06, + "loss": 0.5279, "step": 4529 }, { - "epoch": 0.48, - "grad_norm": 2.5748292058521938, - "learning_rate": 5.619597991229566e-06, - "loss": 0.6199, + "epoch": 0.32, + "grad_norm": 1.6336648725416023, + "learning_rate": 7.933066100377183e-06, + "loss": 0.5212, "step": 4530 }, { - "epoch": 0.48, - "grad_norm": 2.9437147149445138, - "learning_rate": 5.617906852916183e-06, - "loss": 0.6011, + "epoch": 0.32, + "grad_norm": 1.7624653517696096, + "learning_rate": 7.932135350250132e-06, + "loss": 0.5701, "step": 4531 }, { - "epoch": 0.48, - "grad_norm": 23.92235581251259, - "learning_rate": 5.6162156428161665e-06, - "loss": 0.6263, + "epoch": 0.32, + "grad_norm": 1.7437186628510541, + "learning_rate": 7.931204445237758e-06, + "loss": 0.5833, "step": 4532 }, { - "epoch": 0.48, - "grad_norm": 3.087366386179771, - "learning_rate": 5.614524361125998e-06, - "loss": 0.5655, + "epoch": 0.32, + "grad_norm": 1.3818810625459443, + "learning_rate": 7.930273385389234e-06, + "loss": 0.5409, "step": 4533 }, { - "epoch": 0.48, - "grad_norm": 3.503816305177784, - "learning_rate": 5.612833008042166e-06, - "loss": 0.6989, + "epoch": 0.32, + "grad_norm": 0.8948879882738449, + "learning_rate": 7.92934217075374e-06, + "loss": 0.4343, "step": 4534 }, { - "epoch": 0.48, - "grad_norm": 2.0399171517394388, - "learning_rate": 5.611141583761167e-06, - "loss": 0.5931, + "epoch": 0.32, + "grad_norm": 1.6940430377133815, + "learning_rate": 7.928410801380466e-06, + "loss": 0.6134, "step": 4535 }, { - "epoch": 0.48, - "grad_norm": 2.4370559310970306, - "learning_rate": 5.609450088479506e-06, - "loss": 0.6178, + "epoch": 0.32, + "grad_norm": 1.9890566100859006, + "learning_rate": 7.92747927731861e-06, + "loss": 0.5464, "step": 4536 }, { - "epoch": 0.48, - "grad_norm": 2.033431572385822, - "learning_rate": 5.607758522393693e-06, - "loss": 0.5607, + "epoch": 0.32, + "grad_norm": 1.7230605041486837, + "learning_rate": 7.92654759861738e-06, + "loss": 0.5584, "step": 4537 }, { - "epoch": 0.48, - "grad_norm": 6.41604755439263, - "learning_rate": 5.6060668857002545e-06, - "loss": 0.6314, + "epoch": 0.32, + "grad_norm": 0.7690049472874627, + "learning_rate": 7.92561576532599e-06, + "loss": 0.4479, "step": 4538 }, { - "epoch": 0.48, - "grad_norm": 2.3686567490270187, - "learning_rate": 5.604375178595715e-06, - "loss": 0.6557, + "epoch": 0.32, + "grad_norm": 1.6070010054957473, + "learning_rate": 7.92468377749366e-06, + "loss": 0.6161, "step": 4539 }, { - "epoch": 0.48, - "grad_norm": 2.1562680126957847, - "learning_rate": 5.6026834012766155e-06, - "loss": 0.6312, + "epoch": 0.32, + "grad_norm": 1.6997033101963424, + "learning_rate": 7.923751635169624e-06, + "loss": 0.5192, "step": 4540 }, { - "epoch": 0.48, - "grad_norm": 2.498322304260171, - "learning_rate": 5.600991553939501e-06, - "loss": 0.7011, + "epoch": 0.32, + "grad_norm": 1.506006489709928, + "learning_rate": 7.92281933840312e-06, + "loss": 0.5126, "step": 4541 }, { - "epoch": 0.48, - "grad_norm": 2.7681759119595353, - "learning_rate": 5.5992996367809236e-06, - "loss": 0.6749, + "epoch": 0.32, + "grad_norm": 1.5886827403886812, + "learning_rate": 7.921886887243393e-06, + "loss": 0.5293, "step": 4542 }, { - "epoch": 0.48, - "grad_norm": 3.1869566753671816, - "learning_rate": 5.597607649997449e-06, - "loss": 0.6855, + "epoch": 0.32, + "grad_norm": 1.5581380097440565, + "learning_rate": 7.920954281739701e-06, + "loss": 0.5345, "step": 4543 }, { - "epoch": 0.48, - "grad_norm": 2.3926645915251172, - "learning_rate": 5.595915593785644e-06, - "loss": 0.676, + "epoch": 0.32, + "grad_norm": 2.263067295143041, + "learning_rate": 7.920021521941305e-06, + "loss": 0.5491, "step": 4544 }, { - "epoch": 0.48, - "grad_norm": 2.7435380969095537, - "learning_rate": 5.594223468342087e-06, - "loss": 0.5655, + "epoch": 0.32, + "grad_norm": 1.6491038997250576, + "learning_rate": 7.91908860789748e-06, + "loss": 0.579, "step": 4545 }, { - "epoch": 0.48, - "grad_norm": 2.0698113787263352, - "learning_rate": 5.592531273863367e-06, - "loss": 0.5551, + "epoch": 0.32, + "grad_norm": 1.911170818628138, + "learning_rate": 7.918155539657503e-06, + "loss": 0.6028, "step": 4546 }, { - "epoch": 0.48, - "grad_norm": 2.66821684730331, - "learning_rate": 5.590839010546074e-06, - "loss": 0.6269, + "epoch": 0.32, + "grad_norm": 1.6743111864688445, + "learning_rate": 7.917222317270662e-06, + "loss": 0.6399, "step": 4547 }, { - "epoch": 0.48, - "grad_norm": 2.6735786158961927, - "learning_rate": 5.589146678586814e-06, - "loss": 0.5896, + "epoch": 0.32, + "grad_norm": 1.4539380265204296, + "learning_rate": 7.916288940786253e-06, + "loss": 0.4895, "step": 4548 }, { - "epoch": 0.48, - "grad_norm": 2.556035989864172, - "learning_rate": 5.587454278182196e-06, - "loss": 0.7271, + "epoch": 0.32, + "grad_norm": 1.80338165281254, + "learning_rate": 7.915355410253582e-06, + "loss": 0.5568, "step": 4549 }, { - "epoch": 0.48, - "grad_norm": 2.5030779108925056, - "learning_rate": 5.585761809528839e-06, - "loss": 0.5887, + "epoch": 0.32, + "grad_norm": 2.6066257252539202, + "learning_rate": 7.914421725721957e-06, + "loss": 0.621, "step": 4550 }, { - "epoch": 0.48, - "grad_norm": 2.6285299123478363, - "learning_rate": 5.584069272823367e-06, - "loss": 0.6228, + "epoch": 0.32, + "grad_norm": 2.094983957310518, + "learning_rate": 7.913487887240703e-06, + "loss": 0.5799, "step": 4551 }, { - "epoch": 0.48, - "grad_norm": 4.742738953366397, - "learning_rate": 5.582376668262415e-06, - "loss": 0.5916, + "epoch": 0.32, + "grad_norm": 0.8670385040078306, + "learning_rate": 7.912553894859146e-06, + "loss": 0.4763, "step": 4552 }, { - "epoch": 0.48, - "grad_norm": 2.051227649046525, - "learning_rate": 5.580683996042625e-06, - "loss": 0.5793, + "epoch": 0.32, + "grad_norm": 1.750342217783563, + "learning_rate": 7.911619748626622e-06, + "loss": 0.4741, "step": 4553 }, { - "epoch": 0.48, - "grad_norm": 2.683953068169987, - "learning_rate": 5.578991256360649e-06, - "loss": 0.6996, + "epoch": 0.32, + "grad_norm": 5.999353517009271, + "learning_rate": 7.910685448592478e-06, + "loss": 0.6589, "step": 4554 }, { - "epoch": 0.48, - "grad_norm": 2.4168596403111824, - "learning_rate": 5.577298449413141e-06, - "loss": 0.59, + "epoch": 0.32, + "grad_norm": 1.5034931748475453, + "learning_rate": 7.909750994806066e-06, + "loss": 0.5266, "step": 4555 }, { - "epoch": 0.48, - "grad_norm": 2.5891727673576144, - "learning_rate": 5.575605575396767e-06, - "loss": 0.635, + "epoch": 0.32, + "grad_norm": 1.7745994152209272, + "learning_rate": 7.908816387316747e-06, + "loss": 0.5339, "step": 4556 }, { - "epoch": 0.48, - "grad_norm": 3.164368406974795, - "learning_rate": 5.573912634508203e-06, - "loss": 0.6719, + "epoch": 0.32, + "grad_norm": 0.6888961189850827, + "learning_rate": 7.90788162617389e-06, + "loss": 0.459, "step": 4557 }, { - "epoch": 0.48, - "grad_norm": 2.1467160524829803, - "learning_rate": 5.572219626944128e-06, - "loss": 0.6302, + "epoch": 0.32, + "grad_norm": 1.4458786749830184, + "learning_rate": 7.906946711426873e-06, + "loss": 0.5795, "step": 4558 }, { - "epoch": 0.48, - "grad_norm": 2.9545578011130686, - "learning_rate": 5.5705265529012295e-06, - "loss": 0.6129, + "epoch": 0.32, + "grad_norm": 0.8468161259941396, + "learning_rate": 7.906011643125078e-06, + "loss": 0.4682, "step": 4559 }, { - "epoch": 0.48, - "grad_norm": 2.397337716611551, - "learning_rate": 5.5688334125762065e-06, - "loss": 0.577, + "epoch": 0.32, + "grad_norm": 2.089786054052781, + "learning_rate": 7.905076421317904e-06, + "loss": 0.5373, "step": 4560 }, { - "epoch": 0.48, - "grad_norm": 2.5141297243805707, - "learning_rate": 5.567140206165762e-06, - "loss": 0.6222, + "epoch": 0.32, + "grad_norm": 1.730184182601446, + "learning_rate": 7.904141046054747e-06, + "loss": 0.5386, "step": 4561 }, { - "epoch": 0.48, - "grad_norm": 2.5018217036974577, - "learning_rate": 5.565446933866607e-06, - "loss": 0.6108, + "epoch": 0.32, + "grad_norm": 1.5609528597825848, + "learning_rate": 7.903205517385022e-06, + "loss": 0.5828, "step": 4562 }, { - "epoch": 0.48, - "grad_norm": 2.280655074209046, - "learning_rate": 5.563753595875463e-06, - "loss": 0.6282, + "epoch": 0.32, + "grad_norm": 2.083619669158488, + "learning_rate": 7.902269835358142e-06, + "loss": 0.5621, "step": 4563 }, { - "epoch": 0.48, - "grad_norm": 6.359831147248012, - "learning_rate": 5.562060192389054e-06, - "loss": 0.6622, + "epoch": 0.32, + "grad_norm": 1.5878764924991424, + "learning_rate": 7.901334000023537e-06, + "loss": 0.5991, "step": 4564 }, { - "epoch": 0.48, - "grad_norm": 2.2107889624685333, - "learning_rate": 5.560366723604117e-06, - "loss": 0.6035, + "epoch": 0.32, + "grad_norm": 1.6870373862434531, + "learning_rate": 7.900398011430638e-06, + "loss": 0.6115, "step": 4565 }, { - "epoch": 0.48, - "grad_norm": 2.5994180305445815, - "learning_rate": 5.558673189717395e-06, - "loss": 0.6777, + "epoch": 0.32, + "grad_norm": 1.9518547947943425, + "learning_rate": 7.899461869628889e-06, + "loss": 0.5782, "step": 4566 }, { - "epoch": 0.48, - "grad_norm": 2.7545314951405593, - "learning_rate": 5.556979590925636e-06, - "loss": 0.5888, + "epoch": 0.32, + "grad_norm": 1.5565734679743028, + "learning_rate": 7.898525574667737e-06, + "loss": 0.5346, "step": 4567 }, { - "epoch": 0.48, - "grad_norm": 2.364332000424429, - "learning_rate": 5.555285927425599e-06, - "loss": 0.6485, + "epoch": 0.32, + "grad_norm": 1.4811358773251708, + "learning_rate": 7.897589126596645e-06, + "loss": 0.5248, "step": 4568 }, { - "epoch": 0.48, - "grad_norm": 2.5857119479401773, - "learning_rate": 5.553592199414047e-06, - "loss": 0.6616, + "epoch": 0.32, + "grad_norm": 0.9315930240557068, + "learning_rate": 7.896652525465077e-06, + "loss": 0.4686, "step": 4569 }, { - "epoch": 0.48, - "grad_norm": 3.15710865319317, - "learning_rate": 5.551898407087754e-06, - "loss": 0.6485, + "epoch": 0.32, + "grad_norm": 2.154826606486221, + "learning_rate": 7.895715771322505e-06, + "loss": 0.5661, "step": 4570 }, { - "epoch": 0.48, - "grad_norm": 2.8154040551745236, - "learning_rate": 5.550204550643501e-06, - "loss": 0.6448, + "epoch": 0.32, + "grad_norm": 1.757907272582158, + "learning_rate": 7.894778864218415e-06, + "loss": 0.6543, "step": 4571 }, { - "epoch": 0.48, - "grad_norm": 2.8762614479514133, - "learning_rate": 5.548510630278073e-06, - "loss": 0.5749, + "epoch": 0.32, + "grad_norm": 0.7945292097449744, + "learning_rate": 7.893841804202299e-06, + "loss": 0.4514, "step": 4572 }, { - "epoch": 0.48, - "grad_norm": 3.430020443992809, - "learning_rate": 5.5468166461882645e-06, - "loss": 0.694, + "epoch": 0.32, + "grad_norm": 1.6825525465514926, + "learning_rate": 7.892904591323651e-06, + "loss": 0.5476, "step": 4573 }, { - "epoch": 0.48, - "grad_norm": 2.010935891713146, - "learning_rate": 5.545122598570879e-06, - "loss": 0.5862, + "epoch": 0.32, + "grad_norm": 4.269125601171483, + "learning_rate": 7.891967225631979e-06, + "loss": 0.5375, "step": 4574 }, { - "epoch": 0.48, - "grad_norm": 2.755230997400163, - "learning_rate": 5.543428487622727e-06, - "loss": 0.6561, + "epoch": 0.32, + "grad_norm": 1.8069818987147572, + "learning_rate": 7.8910297071768e-06, + "loss": 0.5343, "step": 4575 }, { - "epoch": 0.48, - "grad_norm": 4.059923185258385, - "learning_rate": 5.5417343135406206e-06, - "loss": 0.6963, + "epoch": 0.32, + "grad_norm": 1.8032211616448546, + "learning_rate": 7.890092036007636e-06, + "loss": 0.565, "step": 4576 }, { - "epoch": 0.48, - "grad_norm": 4.790230956470769, - "learning_rate": 5.54004007652139e-06, - "loss": 0.5903, + "epoch": 0.32, + "grad_norm": 1.5646533408185028, + "learning_rate": 7.889154212174018e-06, + "loss": 0.5207, "step": 4577 }, { - "epoch": 0.48, - "grad_norm": 2.829573606090716, - "learning_rate": 5.5383457767618655e-06, - "loss": 0.6433, + "epoch": 0.32, + "grad_norm": 2.056784263535567, + "learning_rate": 7.888216235725484e-06, + "loss": 0.6256, "step": 4578 }, { - "epoch": 0.48, - "grad_norm": 3.3802992751931424, - "learning_rate": 5.5366514144588835e-06, - "loss": 0.6504, + "epoch": 0.32, + "grad_norm": 1.5199069206278482, + "learning_rate": 7.887278106711582e-06, + "loss": 0.4808, "step": 4579 }, { - "epoch": 0.48, - "grad_norm": 2.87227842810873, - "learning_rate": 5.534956989809293e-06, - "loss": 0.6567, + "epoch": 0.33, + "grad_norm": 1.7348565057699274, + "learning_rate": 7.886339825181865e-06, + "loss": 0.5661, "step": 4580 }, { - "epoch": 0.48, - "grad_norm": 3.680652735847989, - "learning_rate": 5.533262503009944e-06, - "loss": 0.6889, + "epoch": 0.33, + "grad_norm": 1.3889889460426779, + "learning_rate": 7.885401391185902e-06, + "loss": 0.5195, "step": 4581 }, { - "epoch": 0.48, - "grad_norm": 3.381512231517992, - "learning_rate": 5.5315679542577e-06, - "loss": 0.6544, + "epoch": 0.33, + "grad_norm": 1.7677813706179504, + "learning_rate": 7.884462804773259e-06, + "loss": 0.5004, "step": 4582 }, { - "epoch": 0.48, - "grad_norm": 2.546877424221789, - "learning_rate": 5.529873343749428e-06, - "loss": 0.6369, + "epoch": 0.33, + "grad_norm": 1.7372542242985938, + "learning_rate": 7.883524065993515e-06, + "loss": 0.5755, "step": 4583 }, { - "epoch": 0.48, - "grad_norm": 3.319093075019239, - "learning_rate": 5.528178671682002e-06, - "loss": 0.5969, + "epoch": 0.33, + "grad_norm": 3.3901428963683937, + "learning_rate": 7.882585174896261e-06, + "loss": 0.5182, "step": 4584 }, { - "epoch": 0.48, - "grad_norm": 2.453302660315127, - "learning_rate": 5.5264839382523035e-06, - "loss": 0.5925, + "epoch": 0.33, + "grad_norm": 5.383043571236327, + "learning_rate": 7.88164613153109e-06, + "loss": 0.578, "step": 4585 }, { - "epoch": 0.48, - "grad_norm": 2.3352208256933125, - "learning_rate": 5.524789143657226e-06, - "loss": 0.6313, + "epoch": 0.33, + "grad_norm": 1.5333468773117762, + "learning_rate": 7.880706935947605e-06, + "loss": 0.545, "step": 4586 }, { - "epoch": 0.48, - "grad_norm": 3.6803200585589213, - "learning_rate": 5.523094288093659e-06, - "loss": 0.6757, + "epoch": 0.33, + "grad_norm": 1.5583513814657557, + "learning_rate": 7.87976758819542e-06, + "loss": 0.4879, "step": 4587 }, { - "epoch": 0.48, - "grad_norm": 3.4253926993814092, - "learning_rate": 5.521399371758511e-06, - "loss": 0.6969, + "epoch": 0.33, + "grad_norm": 1.6635498370437123, + "learning_rate": 7.878828088324154e-06, + "loss": 0.5659, "step": 4588 }, { - "epoch": 0.48, - "grad_norm": 3.223973921567959, - "learning_rate": 5.519704394848693e-06, - "loss": 0.6356, + "epoch": 0.33, + "grad_norm": 1.4655871037612245, + "learning_rate": 7.877888436383431e-06, + "loss": 0.5789, "step": 4589 }, { - "epoch": 0.48, - "grad_norm": 3.6909496271121074, - "learning_rate": 5.518009357561119e-06, - "loss": 0.6763, + "epoch": 0.33, + "grad_norm": 1.5815802112081216, + "learning_rate": 7.87694863242289e-06, + "loss": 0.6199, "step": 4590 }, { - "epoch": 0.48, - "grad_norm": 2.315901845822254, - "learning_rate": 5.516314260092717e-06, - "loss": 0.6356, + "epoch": 0.33, + "grad_norm": 0.8583607672441397, + "learning_rate": 7.876008676492175e-06, + "loss": 0.4775, "step": 4591 }, { - "epoch": 0.48, - "grad_norm": 2.885139970332389, - "learning_rate": 5.514619102640415e-06, - "loss": 0.6069, + "epoch": 0.33, + "grad_norm": 0.8064417051506307, + "learning_rate": 7.875068568640935e-06, + "loss": 0.4671, "step": 4592 }, { - "epoch": 0.48, - "grad_norm": 2.566257463657484, - "learning_rate": 5.512923885401154e-06, - "loss": 0.6844, + "epoch": 0.33, + "grad_norm": 1.963628589123776, + "learning_rate": 7.874128308918831e-06, + "loss": 0.5512, "step": 4593 }, { - "epoch": 0.48, - "grad_norm": 1.1239709742116073, - "learning_rate": 5.511228608571879e-06, - "loss": 0.5754, + "epoch": 0.33, + "grad_norm": 1.8399619705050907, + "learning_rate": 7.873187897375531e-06, + "loss": 0.5428, "step": 4594 }, { - "epoch": 0.48, - "grad_norm": 3.1944023152638357, - "learning_rate": 5.5095332723495425e-06, - "loss": 0.6047, + "epoch": 0.33, + "grad_norm": 1.6640215094157382, + "learning_rate": 7.872247334060712e-06, + "loss": 0.5871, "step": 4595 }, { - "epoch": 0.48, - "grad_norm": 3.0417150262269077, - "learning_rate": 5.507837876931102e-06, - "loss": 0.6709, + "epoch": 0.33, + "grad_norm": 1.7290593151739144, + "learning_rate": 7.871306619024055e-06, + "loss": 0.6148, "step": 4596 }, { - "epoch": 0.48, - "grad_norm": 3.1835449500447512, - "learning_rate": 5.506142422513525e-06, - "loss": 0.6766, + "epoch": 0.33, + "grad_norm": 1.4707844127066123, + "learning_rate": 7.870365752315252e-06, + "loss": 0.5147, "step": 4597 }, { - "epoch": 0.48, - "grad_norm": 2.4531359904321675, - "learning_rate": 5.504446909293786e-06, - "loss": 0.6872, + "epoch": 0.33, + "grad_norm": 1.5874864111277935, + "learning_rate": 7.869424733984009e-06, + "loss": 0.5896, "step": 4598 }, { - "epoch": 0.48, - "grad_norm": 2.9575039883577126, - "learning_rate": 5.502751337468862e-06, - "loss": 0.6198, + "epoch": 0.33, + "grad_norm": 1.9431718931373823, + "learning_rate": 7.868483564080022e-06, + "loss": 0.5378, "step": 4599 }, { - "epoch": 0.48, - "grad_norm": 3.254352953269388, - "learning_rate": 5.5010557072357395e-06, - "loss": 0.7175, + "epoch": 0.33, + "grad_norm": 1.48801425124575, + "learning_rate": 7.867542242653017e-06, + "loss": 0.5572, "step": 4600 }, { - "epoch": 0.48, - "grad_norm": 2.5317638836045275, - "learning_rate": 5.499360018791416e-06, - "loss": 0.626, + "epoch": 0.33, + "grad_norm": 1.4218421316550778, + "learning_rate": 7.866600769752714e-06, + "loss": 0.5256, "step": 4601 }, { - "epoch": 0.48, - "grad_norm": 3.080250323596298, - "learning_rate": 5.497664272332888e-06, - "loss": 0.6564, + "epoch": 0.33, + "grad_norm": 1.7015423958575833, + "learning_rate": 7.865659145428843e-06, + "loss": 0.5796, "step": 4602 }, { - "epoch": 0.48, - "grad_norm": 4.791033529732766, - "learning_rate": 5.495968468057164e-06, - "loss": 0.6691, + "epoch": 0.33, + "grad_norm": 2.1177174559776963, + "learning_rate": 7.864717369731148e-06, + "loss": 0.5535, "step": 4603 }, { - "epoch": 0.48, - "grad_norm": 3.576002951182326, - "learning_rate": 5.4942726061612564e-06, - "loss": 0.7081, + "epoch": 0.33, + "grad_norm": 1.557064133080552, + "learning_rate": 7.863775442709374e-06, + "loss": 0.5607, "step": 4604 }, { - "epoch": 0.48, - "grad_norm": 6.002512038489308, - "learning_rate": 5.492576686842186e-06, - "loss": 0.5352, + "epoch": 0.33, + "grad_norm": 1.532061875173884, + "learning_rate": 7.862833364413277e-06, + "loss": 0.5196, "step": 4605 }, { - "epoch": 0.48, - "grad_norm": 3.3897993264643347, - "learning_rate": 5.49088071029698e-06, - "loss": 0.6308, + "epoch": 0.33, + "grad_norm": 1.75511028043191, + "learning_rate": 7.861891134892621e-06, + "loss": 0.5585, "step": 4606 }, { - "epoch": 0.48, - "grad_norm": 2.409849318201643, - "learning_rate": 5.489184676722673e-06, - "loss": 0.6008, + "epoch": 0.33, + "grad_norm": 1.631777713071407, + "learning_rate": 7.860948754197178e-06, + "loss": 0.5916, "step": 4607 }, { - "epoch": 0.48, - "grad_norm": 2.5209078919290677, - "learning_rate": 5.487488586316304e-06, - "loss": 0.609, + "epoch": 0.33, + "grad_norm": 1.4974051628079441, + "learning_rate": 7.860006222376729e-06, + "loss": 0.5338, "step": 4608 }, { - "epoch": 0.49, - "grad_norm": 2.3412970675636466, - "learning_rate": 5.485792439274919e-06, - "loss": 0.644, + "epoch": 0.33, + "grad_norm": 1.7751561468747112, + "learning_rate": 7.859063539481057e-06, + "loss": 0.5735, "step": 4609 }, { - "epoch": 0.49, - "grad_norm": 0.9448249147025708, - "learning_rate": 5.484096235795574e-06, - "loss": 0.5841, + "epoch": 0.33, + "grad_norm": 1.7379646131914435, + "learning_rate": 7.858120705559963e-06, + "loss": 0.5607, "step": 4610 }, { - "epoch": 0.49, - "grad_norm": 5.4569880167790625, - "learning_rate": 5.482399976075327e-06, - "loss": 0.6169, + "epoch": 0.33, + "grad_norm": 1.5623552436581924, + "learning_rate": 7.85717772066325e-06, + "loss": 0.5734, "step": 4611 }, { - "epoch": 0.49, - "grad_norm": 4.038821439830503, - "learning_rate": 5.4807036603112465e-06, - "loss": 0.6924, + "epoch": 0.33, + "grad_norm": 1.77886386217676, + "learning_rate": 7.856234584840725e-06, + "loss": 0.5581, "step": 4612 }, { - "epoch": 0.49, - "grad_norm": 2.1488134768098157, - "learning_rate": 5.479007288700403e-06, - "loss": 0.6386, + "epoch": 0.33, + "grad_norm": 1.6647345541964371, + "learning_rate": 7.855291298142214e-06, + "loss": 0.539, "step": 4613 }, { - "epoch": 0.49, - "grad_norm": 3.0696890955921132, - "learning_rate": 5.477310861439877e-06, - "loss": 0.6493, + "epoch": 0.33, + "grad_norm": 2.7969402416441036, + "learning_rate": 7.85434786061754e-06, + "loss": 0.594, "step": 4614 }, { - "epoch": 0.49, - "grad_norm": 2.5185747574560793, - "learning_rate": 5.475614378726757e-06, - "loss": 0.6438, + "epoch": 0.33, + "grad_norm": 1.6498719215756772, + "learning_rate": 7.85340427231654e-06, + "loss": 0.5527, "step": 4615 }, { - "epoch": 0.49, - "grad_norm": 2.0897327507354406, - "learning_rate": 5.4739178407581315e-06, - "loss": 0.5929, + "epoch": 0.33, + "grad_norm": 1.5256073249994104, + "learning_rate": 7.852460533289059e-06, + "loss": 0.5636, "step": 4616 }, { - "epoch": 0.49, - "grad_norm": 2.6311884639260477, - "learning_rate": 5.4722212477311025e-06, - "loss": 0.6394, + "epoch": 0.33, + "grad_norm": 1.7930427929133756, + "learning_rate": 7.851516643584947e-06, + "loss": 0.5453, "step": 4617 }, { - "epoch": 0.49, - "grad_norm": 2.269733417286331, - "learning_rate": 5.470524599842773e-06, - "loss": 0.5718, - "step": 4618 + "epoch": 0.33, + "grad_norm": 4.083199155161327, + "learning_rate": 7.850572603254064e-06, + "loss": 0.5372, + "step": 4618 }, { - "epoch": 0.49, - "grad_norm": 1.021071958636412, - "learning_rate": 5.468827897290256e-06, - "loss": 0.5784, + "epoch": 0.33, + "grad_norm": 1.6563014730117527, + "learning_rate": 7.849628412346276e-06, + "loss": 0.5376, "step": 4619 }, { - "epoch": 0.49, - "grad_norm": 3.5006735943403235, - "learning_rate": 5.46713114027067e-06, - "loss": 0.6056, + "epoch": 0.33, + "grad_norm": 1.6793216427305293, + "learning_rate": 7.848684070911458e-06, + "loss": 0.5828, "step": 4620 }, { - "epoch": 0.49, - "grad_norm": 1.0266506883030095, - "learning_rate": 5.465434328981136e-06, - "loss": 0.5808, + "epoch": 0.33, + "grad_norm": 0.836824767348362, + "learning_rate": 7.847739578999496e-06, + "loss": 0.4524, "step": 4621 }, { - "epoch": 0.49, - "grad_norm": 3.8304477124966176, - "learning_rate": 5.463737463618788e-06, - "loss": 0.7006, + "epoch": 0.33, + "grad_norm": 1.5645723098887703, + "learning_rate": 7.846794936660281e-06, + "loss": 0.5595, "step": 4622 }, { - "epoch": 0.49, - "grad_norm": 2.515291735666162, - "learning_rate": 5.462040544380764e-06, - "loss": 0.7003, + "epoch": 0.33, + "grad_norm": 1.4858043384798767, + "learning_rate": 7.845850143943709e-06, + "loss": 0.5323, "step": 4623 }, { - "epoch": 0.49, - "grad_norm": 3.387353672942185, - "learning_rate": 5.460343571464203e-06, - "loss": 0.6504, + "epoch": 0.33, + "grad_norm": 1.5141223949141884, + "learning_rate": 7.844905200899693e-06, + "loss": 0.4864, "step": 4624 }, { - "epoch": 0.49, - "grad_norm": 4.141288708078109, - "learning_rate": 5.458646545066258e-06, - "loss": 0.5682, + "epoch": 0.33, + "grad_norm": 2.1167184510443833, + "learning_rate": 7.843960107578142e-06, + "loss": 0.5286, "step": 4625 }, { - "epoch": 0.49, - "grad_norm": 2.5542956306423674, - "learning_rate": 5.456949465384082e-06, - "loss": 0.6874, + "epoch": 0.33, + "grad_norm": 1.5840746989672634, + "learning_rate": 7.843014864028981e-06, + "loss": 0.5395, "step": 4626 }, { - "epoch": 0.49, - "grad_norm": 2.9922167702054705, - "learning_rate": 5.455252332614839e-06, - "loss": 0.6838, + "epoch": 0.33, + "grad_norm": 2.668708501608903, + "learning_rate": 7.842069470302143e-06, + "loss": 0.5782, "step": 4627 }, { - "epoch": 0.49, - "grad_norm": 2.914946760609991, - "learning_rate": 5.453555146955696e-06, - "loss": 0.6331, + "epoch": 0.33, + "grad_norm": 1.5108115132468567, + "learning_rate": 7.841123926447565e-06, + "loss": 0.5882, "step": 4628 }, { - "epoch": 0.49, - "grad_norm": 3.352573285600877, - "learning_rate": 5.451857908603826e-06, - "loss": 0.635, + "epoch": 0.33, + "grad_norm": 1.8768684469320043, + "learning_rate": 7.840178232515192e-06, + "loss": 0.571, "step": 4629 }, { - "epoch": 0.49, - "grad_norm": 2.6733703812603755, - "learning_rate": 5.450160617756411e-06, - "loss": 0.6508, + "epoch": 0.33, + "grad_norm": 0.7977295751028157, + "learning_rate": 7.839232388554982e-06, + "loss": 0.4551, "step": 4630 }, { - "epoch": 0.49, - "grad_norm": 2.349826396279672, - "learning_rate": 5.448463274610637e-06, - "loss": 0.6848, + "epoch": 0.33, + "grad_norm": 3.272430883247567, + "learning_rate": 7.838286394616898e-06, + "loss": 0.5961, "step": 4631 }, { - "epoch": 0.49, - "grad_norm": 5.81980906330448, - "learning_rate": 5.446765879363697e-06, - "loss": 0.6457, + "epoch": 0.33, + "grad_norm": 1.66954679938459, + "learning_rate": 7.837340250750909e-06, + "loss": 0.5214, "step": 4632 }, { - "epoch": 0.49, - "grad_norm": 2.651707782689453, - "learning_rate": 5.445068432212787e-06, - "loss": 0.6972, + "epoch": 0.33, + "grad_norm": 1.591472261671352, + "learning_rate": 7.836393957006993e-06, + "loss": 0.5559, "step": 4633 }, { - "epoch": 0.49, - "grad_norm": 2.3655792841646512, - "learning_rate": 5.443370933355114e-06, - "loss": 0.5924, + "epoch": 0.33, + "grad_norm": 1.6409916784730205, + "learning_rate": 7.835447513435136e-06, + "loss": 0.5219, "step": 4634 }, { - "epoch": 0.49, - "grad_norm": 2.7313353670254203, - "learning_rate": 5.441673382987886e-06, - "loss": 0.616, + "epoch": 0.33, + "grad_norm": 1.6146693010694948, + "learning_rate": 7.834500920085337e-06, + "loss": 0.5827, "step": 4635 }, { - "epoch": 0.49, - "grad_norm": 2.4130469073654335, - "learning_rate": 5.439975781308322e-06, - "loss": 0.6315, + "epoch": 0.33, + "grad_norm": 3.706783098139606, + "learning_rate": 7.833554177007592e-06, + "loss": 0.5125, "step": 4636 }, { - "epoch": 0.49, - "grad_norm": 3.597140459042695, - "learning_rate": 5.4382781285136445e-06, - "loss": 0.6355, + "epoch": 0.33, + "grad_norm": 2.0514074933372104, + "learning_rate": 7.832607284251913e-06, + "loss": 0.5583, "step": 4637 }, { - "epoch": 0.49, - "grad_norm": 2.2052663360438993, - "learning_rate": 5.436580424801081e-06, - "loss": 0.6403, + "epoch": 0.33, + "grad_norm": 1.9033711019814257, + "learning_rate": 7.831660241868322e-06, + "loss": 0.5539, "step": 4638 }, { - "epoch": 0.49, - "grad_norm": 2.0955992547403572, - "learning_rate": 5.434882670367865e-06, - "loss": 0.5755, + "epoch": 0.33, + "grad_norm": 2.2857120424115993, + "learning_rate": 7.83071304990684e-06, + "loss": 0.6027, "step": 4639 }, { - "epoch": 0.49, - "grad_norm": 2.7478098709325316, - "learning_rate": 5.4331848654112374e-06, - "loss": 0.713, + "epoch": 0.33, + "grad_norm": 1.4285901373059415, + "learning_rate": 7.829765708417503e-06, + "loss": 0.5361, "step": 4640 }, { - "epoch": 0.49, - "grad_norm": 3.099989483770273, - "learning_rate": 5.431487010128445e-06, - "loss": 0.6117, + "epoch": 0.33, + "grad_norm": 1.5779745956193771, + "learning_rate": 7.828818217450355e-06, + "loss": 0.5956, "step": 4641 }, { - "epoch": 0.49, - "grad_norm": 2.7349941290390514, - "learning_rate": 5.4297891047167385e-06, - "loss": 0.6223, + "epoch": 0.33, + "grad_norm": 1.697777139088535, + "learning_rate": 7.82787057705544e-06, + "loss": 0.5605, "step": 4642 }, { - "epoch": 0.49, - "grad_norm": 1.0659832601686883, - "learning_rate": 5.428091149373377e-06, - "loss": 0.5199, + "epoch": 0.33, + "grad_norm": 2.568037101748435, + "learning_rate": 7.826922787282823e-06, + "loss": 0.5001, "step": 4643 }, { - "epoch": 0.49, - "grad_norm": 9.61686418980415, - "learning_rate": 5.426393144295623e-06, - "loss": 0.6323, + "epoch": 0.33, + "grad_norm": 1.5528532343978865, + "learning_rate": 7.825974848182562e-06, + "loss": 0.5233, "step": 4644 }, { - "epoch": 0.49, - "grad_norm": 3.013180159122992, - "learning_rate": 5.4246950896807445e-06, - "loss": 0.6634, + "epoch": 0.33, + "grad_norm": 2.0639953023548263, + "learning_rate": 7.825026759804735e-06, + "loss": 0.5712, "step": 4645 }, { - "epoch": 0.49, - "grad_norm": 3.7524905717817365, - "learning_rate": 5.422996985726019e-06, - "loss": 0.5842, + "epoch": 0.33, + "grad_norm": 1.5706428731727844, + "learning_rate": 7.824078522199422e-06, + "loss": 0.5402, "step": 4646 }, { - "epoch": 0.49, - "grad_norm": 2.2515177137365296, - "learning_rate": 5.421298832628729e-06, - "loss": 0.6197, + "epoch": 0.33, + "grad_norm": 1.9357097742177605, + "learning_rate": 7.823130135416713e-06, + "loss": 0.6155, "step": 4647 }, { - "epoch": 0.49, - "grad_norm": 3.427073007294642, - "learning_rate": 5.419600630586155e-06, - "loss": 0.6538, + "epoch": 0.33, + "grad_norm": 1.6616978216312759, + "learning_rate": 7.822181599506704e-06, + "loss": 0.5439, "step": 4648 }, { - "epoch": 0.49, - "grad_norm": 2.743461241178892, - "learning_rate": 5.417902379795593e-06, - "loss": 0.6647, + "epoch": 0.33, + "grad_norm": 0.884321109577002, + "learning_rate": 7.821232914519498e-06, + "loss": 0.4549, "step": 4649 }, { - "epoch": 0.49, - "grad_norm": 2.3931710581662897, - "learning_rate": 5.416204080454343e-06, - "loss": 0.6355, + "epoch": 0.33, + "grad_norm": 1.5384978611034608, + "learning_rate": 7.820284080505211e-06, + "loss": 0.5461, "step": 4650 }, { - "epoch": 0.49, - "grad_norm": 3.8194753476800356, - "learning_rate": 5.414505732759704e-06, - "loss": 0.6645, + "epoch": 0.33, + "grad_norm": 0.7098303035460066, + "learning_rate": 7.819335097513962e-06, + "loss": 0.4545, "step": 4651 }, { - "epoch": 0.49, - "grad_norm": 2.6326764418481194, - "learning_rate": 5.412807336908987e-06, - "loss": 0.6086, + "epoch": 0.33, + "grad_norm": 1.5906438671135015, + "learning_rate": 7.818385965595883e-06, + "loss": 0.5406, "step": 4652 }, { - "epoch": 0.49, - "grad_norm": 2.284399679999572, - "learning_rate": 5.411108893099508e-06, - "loss": 0.5637, + "epoch": 0.33, + "grad_norm": 1.4788236724338404, + "learning_rate": 7.817436684801102e-06, + "loss": 0.5397, "step": 4653 }, { - "epoch": 0.49, - "grad_norm": 6.7384502492836775, - "learning_rate": 5.409410401528586e-06, - "loss": 0.622, + "epoch": 0.33, + "grad_norm": 0.7175289152082126, + "learning_rate": 7.816487255179775e-06, + "loss": 0.4536, "step": 4654 }, { - "epoch": 0.49, - "grad_norm": 3.61822403374306, - "learning_rate": 5.4077118623935476e-06, - "loss": 0.6361, + "epoch": 0.33, + "grad_norm": 1.5646691128932102, + "learning_rate": 7.815537676782043e-06, + "loss": 0.5402, "step": 4655 }, { - "epoch": 0.49, - "grad_norm": 4.2720764272007425, - "learning_rate": 5.406013275891723e-06, - "loss": 0.7198, + "epoch": 0.33, + "grad_norm": 1.6355933689192153, + "learning_rate": 7.814587949658073e-06, + "loss": 0.5537, "step": 4656 }, { - "epoch": 0.49, - "grad_norm": 3.533871073074416, - "learning_rate": 5.404314642220448e-06, - "loss": 0.6521, + "epoch": 0.33, + "grad_norm": 1.5262555981607469, + "learning_rate": 7.813638073858027e-06, + "loss": 0.5469, "step": 4657 }, { - "epoch": 0.49, - "grad_norm": 2.5014645838243834, - "learning_rate": 5.40261596157707e-06, - "loss": 0.623, + "epoch": 0.33, + "grad_norm": 1.7682879304286492, + "learning_rate": 7.812688049432087e-06, + "loss": 0.5771, "step": 4658 }, { - "epoch": 0.49, - "grad_norm": 5.187282413928454, - "learning_rate": 5.40091723415893e-06, - "loss": 0.6687, + "epoch": 0.33, + "grad_norm": 0.7736915895546868, + "learning_rate": 7.811737876430432e-06, + "loss": 0.4589, "step": 4659 }, { - "epoch": 0.49, - "grad_norm": 2.210081592011959, - "learning_rate": 5.399218460163387e-06, - "loss": 0.5783, + "epoch": 0.33, + "grad_norm": 1.7409465534816897, + "learning_rate": 7.810787554903257e-06, + "loss": 0.5737, "step": 4660 }, { - "epoch": 0.49, - "grad_norm": 2.9686826745928863, - "learning_rate": 5.397519639787796e-06, - "loss": 0.6148, + "epoch": 0.33, + "grad_norm": 1.6073603053534078, + "learning_rate": 7.809837084900757e-06, + "loss": 0.5384, "step": 4661 }, { - "epoch": 0.49, - "grad_norm": 2.8313621442070436, - "learning_rate": 5.395820773229523e-06, - "loss": 0.6224, + "epoch": 0.33, + "grad_norm": 2.2049348617365907, + "learning_rate": 7.808886466473142e-06, + "loss": 0.5339, "step": 4662 }, { - "epoch": 0.49, - "grad_norm": 3.837090983146463, - "learning_rate": 5.394121860685937e-06, - "loss": 0.5873, + "epoch": 0.33, + "grad_norm": 1.5288906332768764, + "learning_rate": 7.807935699670625e-06, + "loss": 0.5647, "step": 4663 }, { - "epoch": 0.49, - "grad_norm": 2.7825956148863606, - "learning_rate": 5.392422902354413e-06, - "loss": 0.6691, + "epoch": 0.33, + "grad_norm": 1.6497824133016323, + "learning_rate": 7.80698478454343e-06, + "loss": 0.6012, "step": 4664 }, { - "epoch": 0.49, - "grad_norm": 3.3329282867648136, - "learning_rate": 5.39072389843233e-06, - "loss": 0.6057, + "epoch": 0.33, + "grad_norm": 1.9596783955684993, + "learning_rate": 7.806033721141788e-06, + "loss": 0.5904, "step": 4665 }, { - "epoch": 0.49, - "grad_norm": 3.5608187127868387, - "learning_rate": 5.389024849117074e-06, - "loss": 0.705, + "epoch": 0.33, + "grad_norm": 1.8917464137497104, + "learning_rate": 7.805082509515937e-06, + "loss": 0.6102, "step": 4666 }, { - "epoch": 0.49, - "grad_norm": 2.706689822341293, - "learning_rate": 5.387325754606035e-06, - "loss": 0.7137, + "epoch": 0.33, + "grad_norm": 1.7388903932913622, + "learning_rate": 7.804131149716123e-06, + "loss": 0.5014, "step": 4667 }, { - "epoch": 0.49, - "grad_norm": 2.5224351370813674, - "learning_rate": 5.3856266150966094e-06, - "loss": 0.6446, + "epoch": 0.33, + "grad_norm": 1.9948965654940347, + "learning_rate": 7.8031796417926e-06, + "loss": 0.5463, "step": 4668 }, { - "epoch": 0.49, - "grad_norm": 4.120747037478334, - "learning_rate": 5.3839274307862e-06, - "loss": 0.6706, + "epoch": 0.33, + "grad_norm": 1.5210416185324516, + "learning_rate": 7.80222798579563e-06, + "loss": 0.5249, "step": 4669 }, { - "epoch": 0.49, - "grad_norm": 3.6521412088636325, - "learning_rate": 5.3822282018722085e-06, - "loss": 0.7024, + "epoch": 0.33, + "grad_norm": 1.6737590323359803, + "learning_rate": 7.801276181775482e-06, + "loss": 0.5715, "step": 4670 }, { - "epoch": 0.49, - "grad_norm": 2.364628947418871, - "learning_rate": 5.380528928552052e-06, - "loss": 0.7261, + "epoch": 0.33, + "grad_norm": 1.6468094923052277, + "learning_rate": 7.800324229782432e-06, + "loss": 0.5528, "step": 4671 }, { - "epoch": 0.49, - "grad_norm": 2.7339482051464534, - "learning_rate": 5.378829611023144e-06, - "loss": 0.6781, + "epoch": 0.33, + "grad_norm": 1.6978241200573425, + "learning_rate": 7.799372129866768e-06, + "loss": 0.6016, "step": 4672 }, { - "epoch": 0.49, - "grad_norm": 2.478305759343481, - "learning_rate": 5.377130249482907e-06, - "loss": 0.6225, + "epoch": 0.33, + "grad_norm": 1.720449361605801, + "learning_rate": 7.798419882078786e-06, + "loss": 0.5923, "step": 4673 }, { - "epoch": 0.49, - "grad_norm": 2.411215799916555, - "learning_rate": 5.3754308441287675e-06, - "loss": 0.6103, + "epoch": 0.33, + "grad_norm": 2.159172380797641, + "learning_rate": 7.797467486468781e-06, + "loss": 0.5161, "step": 4674 }, { - "epoch": 0.49, - "grad_norm": 2.42557822909133, - "learning_rate": 5.3737313951581575e-06, - "loss": 0.5748, + "epoch": 0.33, + "grad_norm": 1.8250232638762027, + "learning_rate": 7.796514943087065e-06, + "loss": 0.6106, "step": 4675 }, { - "epoch": 0.49, - "grad_norm": 2.6013955730359046, - "learning_rate": 5.372031902768514e-06, - "loss": 0.5715, + "epoch": 0.33, + "grad_norm": 0.8462319548833942, + "learning_rate": 7.795562251983953e-06, + "loss": 0.4868, "step": 4676 }, { - "epoch": 0.49, - "grad_norm": 3.1819025201627946, - "learning_rate": 5.370332367157281e-06, - "loss": 0.7339, + "epoch": 0.33, + "grad_norm": 1.7401367530245215, + "learning_rate": 7.794609413209771e-06, + "loss": 0.632, "step": 4677 }, { - "epoch": 0.49, - "grad_norm": 2.588545731682473, - "learning_rate": 5.368632788521903e-06, - "loss": 0.6315, + "epoch": 0.33, + "grad_norm": 1.675446822150429, + "learning_rate": 7.79365642681485e-06, + "loss": 0.5492, "step": 4678 }, { - "epoch": 0.49, - "grad_norm": 3.008669398229263, - "learning_rate": 5.3669331670598335e-06, - "loss": 0.7626, + "epoch": 0.33, + "grad_norm": 1.6077974719649235, + "learning_rate": 7.792703292849531e-06, + "loss": 0.5525, "step": 4679 }, { - "epoch": 0.49, - "grad_norm": 3.43609159650542, - "learning_rate": 5.36523350296853e-06, - "loss": 0.7089, + "epoch": 0.33, + "grad_norm": 1.8902119472429122, + "learning_rate": 7.79175001136416e-06, + "loss": 0.5148, "step": 4680 }, { - "epoch": 0.49, - "grad_norm": 3.121151492979661, - "learning_rate": 5.363533796445452e-06, - "loss": 0.6616, + "epoch": 0.33, + "grad_norm": 1.7726909150681227, + "learning_rate": 7.790796582409093e-06, + "loss": 0.5685, "step": 4681 }, { - "epoch": 0.49, - "grad_norm": 2.7034182260790494, - "learning_rate": 5.361834047688071e-06, - "loss": 0.6528, + "epoch": 0.33, + "grad_norm": 1.4733350733383128, + "learning_rate": 7.789843006034697e-06, + "loss": 0.5679, "step": 4682 }, { - "epoch": 0.49, - "grad_norm": 5.478076643546118, - "learning_rate": 5.360134256893854e-06, - "loss": 0.6114, + "epoch": 0.33, + "grad_norm": 2.028608192113872, + "learning_rate": 7.788889282291337e-06, + "loss": 0.6431, "step": 4683 }, { - "epoch": 0.49, - "grad_norm": 2.6079287424630264, - "learning_rate": 5.35843442426028e-06, - "loss": 0.6421, + "epoch": 0.33, + "grad_norm": 1.6052019718392103, + "learning_rate": 7.787935411229396e-06, + "loss": 0.5309, "step": 4684 }, { - "epoch": 0.49, - "grad_norm": 2.685462278835273, - "learning_rate": 5.356734549984832e-06, - "loss": 0.5111, + "epoch": 0.33, + "grad_norm": 1.8103108674078487, + "learning_rate": 7.78698139289926e-06, + "loss": 0.529, "step": 4685 }, { - "epoch": 0.49, - "grad_norm": 2.979817349950144, - "learning_rate": 5.355034634264996e-06, - "loss": 0.6508, + "epoch": 0.33, + "grad_norm": 1.5621525548778643, + "learning_rate": 7.786027227351324e-06, + "loss": 0.601, "step": 4686 }, { - "epoch": 0.49, - "grad_norm": 3.0200409785087854, - "learning_rate": 5.353334677298261e-06, - "loss": 0.6656, + "epoch": 0.33, + "grad_norm": 2.078850293911053, + "learning_rate": 7.78507291463599e-06, + "loss": 0.6155, "step": 4687 }, { - "epoch": 0.49, - "grad_norm": 2.646554726463927, - "learning_rate": 5.351634679282125e-06, - "loss": 0.5935, + "epoch": 0.33, + "grad_norm": 1.7318182884196307, + "learning_rate": 7.784118454803665e-06, + "loss": 0.5006, "step": 4688 }, { - "epoch": 0.49, - "grad_norm": 2.6235037363889915, - "learning_rate": 5.349934640414089e-06, - "loss": 0.6812, + "epoch": 0.33, + "grad_norm": 1.8720910735795302, + "learning_rate": 7.783163847904772e-06, + "loss": 0.6014, "step": 4689 }, { - "epoch": 0.49, - "grad_norm": 9.07421728360287, - "learning_rate": 5.348234560891657e-06, - "loss": 0.6569, + "epoch": 0.33, + "grad_norm": 1.6110359225124027, + "learning_rate": 7.782209093989731e-06, + "loss": 0.6003, "step": 4690 }, { - "epoch": 0.49, - "grad_norm": 36.97843704858725, - "learning_rate": 5.346534440912341e-06, - "loss": 0.6354, + "epoch": 0.33, + "grad_norm": 1.8686830582716514, + "learning_rate": 7.78125419310898e-06, + "loss": 0.5923, "step": 4691 }, { - "epoch": 0.49, - "grad_norm": 2.4594375427225232, - "learning_rate": 5.3448342806736545e-06, - "loss": 0.6174, + "epoch": 0.33, + "grad_norm": 1.6763669281892364, + "learning_rate": 7.780299145312959e-06, + "loss": 0.4832, "step": 4692 }, { - "epoch": 0.49, - "grad_norm": 2.8018238508168425, - "learning_rate": 5.34313408037312e-06, - "loss": 0.5667, + "epoch": 0.33, + "grad_norm": 1.7024436081433396, + "learning_rate": 7.779343950652113e-06, + "loss": 0.6448, "step": 4693 }, { - "epoch": 0.49, - "grad_norm": 3.009916868253691, - "learning_rate": 5.341433840208258e-06, - "loss": 0.7038, + "epoch": 0.33, + "grad_norm": 1.4859738838254628, + "learning_rate": 7.778388609176907e-06, + "loss": 0.5611, "step": 4694 }, { - "epoch": 0.49, - "grad_norm": 4.258548590941279, - "learning_rate": 5.339733560376601e-06, - "loss": 0.719, + "epoch": 0.33, + "grad_norm": 1.608771143975767, + "learning_rate": 7.777433120937797e-06, + "loss": 0.5555, "step": 4695 }, { - "epoch": 0.49, - "grad_norm": 2.61623774709776, - "learning_rate": 5.33803324107568e-06, - "loss": 0.7362, + "epoch": 0.33, + "grad_norm": 2.0446063984731637, + "learning_rate": 7.776477485985258e-06, + "loss": 0.6412, "step": 4696 }, { - "epoch": 0.49, - "grad_norm": 2.7165699079262935, - "learning_rate": 5.336332882503034e-06, - "loss": 0.6397, + "epoch": 0.33, + "grad_norm": 2.403472528777076, + "learning_rate": 7.77552170436977e-06, + "loss": 0.6635, "step": 4697 }, { - "epoch": 0.49, - "grad_norm": 2.479128754751253, - "learning_rate": 5.334632484856206e-06, - "loss": 0.6062, + "epoch": 0.33, + "grad_norm": 1.7455552603559945, + "learning_rate": 7.774565776141822e-06, + "loss": 0.5759, "step": 4698 }, { - "epoch": 0.49, - "grad_norm": 3.0822937031597597, - "learning_rate": 5.332932048332744e-06, - "loss": 0.6856, + "epoch": 0.33, + "grad_norm": 0.7483085435675426, + "learning_rate": 7.773609701351909e-06, + "loss": 0.4671, "step": 4699 }, { - "epoch": 0.49, - "grad_norm": 2.670085477402618, - "learning_rate": 5.331231573130199e-06, - "loss": 0.6451, + "epoch": 0.33, + "grad_norm": 2.0204353213160084, + "learning_rate": 7.772653480050531e-06, + "loss": 0.5978, "step": 4700 }, { - "epoch": 0.49, - "grad_norm": 2.710855683571757, - "learning_rate": 5.329531059446127e-06, - "loss": 0.5651, + "epoch": 0.33, + "grad_norm": 1.6211656413041549, + "learning_rate": 7.771697112288204e-06, + "loss": 0.5429, "step": 4701 }, { - "epoch": 0.49, - "grad_norm": 4.256008895457131, - "learning_rate": 5.327830507478089e-06, - "loss": 0.6252, + "epoch": 0.33, + "grad_norm": 2.0738403207620575, + "learning_rate": 7.770740598115442e-06, + "loss": 0.5899, "step": 4702 }, { - "epoch": 0.49, - "grad_norm": 2.1096958021617995, - "learning_rate": 5.32612991742365e-06, - "loss": 0.5849, + "epoch": 0.33, + "grad_norm": 0.7118212212258338, + "learning_rate": 7.769783937582773e-06, + "loss": 0.4183, "step": 4703 }, { - "epoch": 0.5, - "grad_norm": 3.5793139887216263, - "learning_rate": 5.32442928948038e-06, - "loss": 0.6785, + "epoch": 0.33, + "grad_norm": 1.4709072674335257, + "learning_rate": 7.768827130740731e-06, + "loss": 0.6237, "step": 4704 }, { - "epoch": 0.5, - "grad_norm": 2.7963244411549644, - "learning_rate": 5.322728623845853e-06, - "loss": 0.6408, + "epoch": 0.33, + "grad_norm": 1.7383033033246675, + "learning_rate": 7.767870177639858e-06, + "loss": 0.5343, "step": 4705 }, { - "epoch": 0.5, - "grad_norm": 2.182317971739211, - "learning_rate": 5.321027920717649e-06, - "loss": 0.6363, + "epoch": 0.33, + "grad_norm": 1.4054312290736928, + "learning_rate": 7.766913078330704e-06, + "loss": 0.5789, "step": 4706 }, { - "epoch": 0.5, - "grad_norm": 3.7507644271214393, - "learning_rate": 5.319327180293347e-06, - "loss": 0.635, + "epoch": 0.33, + "grad_norm": 1.703327255682883, + "learning_rate": 7.765955832863826e-06, + "loss": 0.5647, "step": 4707 }, { - "epoch": 0.5, - "grad_norm": 2.075170533407163, - "learning_rate": 5.317626402770537e-06, - "loss": 0.6228, + "epoch": 0.33, + "grad_norm": 1.6218603614231288, + "learning_rate": 7.764998441289787e-06, + "loss": 0.5393, "step": 4708 }, { - "epoch": 0.5, - "grad_norm": 2.841933692244469, - "learning_rate": 5.3159255883468095e-06, - "loss": 0.6796, + "epoch": 0.33, + "grad_norm": 1.7264969920839324, + "learning_rate": 7.764040903659163e-06, + "loss": 0.5241, "step": 4709 }, { - "epoch": 0.5, - "grad_norm": 3.6806157165268347, - "learning_rate": 5.314224737219761e-06, - "loss": 0.6646, + "epoch": 0.33, + "grad_norm": 1.4741356557590197, + "learning_rate": 7.763083220022531e-06, + "loss": 0.4985, "step": 4710 }, { - "epoch": 0.5, - "grad_norm": 2.533486664828576, - "learning_rate": 5.31252384958699e-06, - "loss": 0.711, + "epoch": 0.33, + "grad_norm": 1.837266823424641, + "learning_rate": 7.762125390430482e-06, + "loss": 0.6346, "step": 4711 }, { - "epoch": 0.5, - "grad_norm": 2.7604461850517406, - "learning_rate": 5.310822925646103e-06, - "loss": 0.604, + "epoch": 0.33, + "grad_norm": 0.7609131160799693, + "learning_rate": 7.76116741493361e-06, + "loss": 0.4809, "step": 4712 }, { - "epoch": 0.5, - "grad_norm": 2.578358994492397, - "learning_rate": 5.309121965594706e-06, - "loss": 0.6351, + "epoch": 0.33, + "grad_norm": 1.4894214595253323, + "learning_rate": 7.760209293582518e-06, + "loss": 0.548, "step": 4713 }, { - "epoch": 0.5, - "grad_norm": 2.579513564368866, - "learning_rate": 5.307420969630412e-06, - "loss": 0.7123, + "epoch": 0.33, + "grad_norm": 1.7235658230972803, + "learning_rate": 7.759251026427819e-06, + "loss": 0.5465, "step": 4714 }, { - "epoch": 0.5, - "grad_norm": 3.582532356341639, - "learning_rate": 5.30571993795084e-06, - "loss": 0.6549, + "epoch": 0.33, + "grad_norm": 1.8244942025338575, + "learning_rate": 7.758292613520131e-06, + "loss": 0.61, "step": 4715 }, { - "epoch": 0.5, - "grad_norm": 2.5183375541671986, - "learning_rate": 5.304018870753608e-06, - "loss": 0.6463, + "epoch": 0.33, + "grad_norm": 1.9880235823163546, + "learning_rate": 7.757334054910082e-06, + "loss": 0.5765, "step": 4716 }, { - "epoch": 0.5, - "grad_norm": 1.0420447788319127, - "learning_rate": 5.3023177682363435e-06, - "loss": 0.5367, + "epoch": 0.33, + "grad_norm": 1.7506339794659247, + "learning_rate": 7.756375350648303e-06, + "loss": 0.5683, "step": 4717 }, { - "epoch": 0.5, - "grad_norm": 2.294424245337193, - "learning_rate": 5.300616630596673e-06, - "loss": 0.6515, + "epoch": 0.33, + "grad_norm": 1.479877763456396, + "learning_rate": 7.75541650078544e-06, + "loss": 0.4923, "step": 4718 }, { - "epoch": 0.5, - "grad_norm": 1.0335405308223342, - "learning_rate": 5.298915458032233e-06, - "loss": 0.5989, + "epoch": 0.33, + "grad_norm": 1.6462878786781092, + "learning_rate": 7.75445750537214e-06, + "loss": 0.6352, "step": 4719 }, { - "epoch": 0.5, - "grad_norm": 1.0427016248786705, - "learning_rate": 5.297214250740658e-06, - "loss": 0.5918, + "epoch": 0.33, + "grad_norm": 1.7553436849151027, + "learning_rate": 7.753498364459062e-06, + "loss": 0.5523, "step": 4720 }, { - "epoch": 0.5, - "grad_norm": 3.004325778129872, - "learning_rate": 5.295513008919592e-06, - "loss": 0.6773, + "epoch": 0.34, + "grad_norm": 1.650376751990671, + "learning_rate": 7.752539078096869e-06, + "loss": 0.5208, "step": 4721 }, { - "epoch": 0.5, - "grad_norm": 2.3701521965706323, - "learning_rate": 5.293811732766677e-06, - "loss": 0.6755, + "epoch": 0.34, + "grad_norm": 0.7900135707471028, + "learning_rate": 7.751579646336235e-06, + "loss": 0.4382, "step": 4722 }, { - "epoch": 0.5, - "grad_norm": 3.111246958207434, - "learning_rate": 5.292110422479565e-06, - "loss": 0.6284, + "epoch": 0.34, + "grad_norm": 2.669318441616321, + "learning_rate": 7.750620069227842e-06, + "loss": 0.4512, "step": 4723 }, { - "epoch": 0.5, - "grad_norm": 3.018425742396122, - "learning_rate": 5.290409078255909e-06, - "loss": 0.6031, + "epoch": 0.34, + "grad_norm": 1.575551780770198, + "learning_rate": 7.749660346822377e-06, + "loss": 0.5665, "step": 4724 }, { - "epoch": 0.5, - "grad_norm": 0.9869690779073568, - "learning_rate": 5.288707700293365e-06, - "loss": 0.5695, + "epoch": 0.34, + "grad_norm": 0.8105189893333551, + "learning_rate": 7.748700479170532e-06, + "loss": 0.4569, "step": 4725 }, { - "epoch": 0.5, - "grad_norm": 1.1072157586028053, - "learning_rate": 5.287006288789596e-06, - "loss": 0.5593, + "epoch": 0.34, + "grad_norm": 1.5535775044271178, + "learning_rate": 7.747740466323016e-06, + "loss": 0.497, "step": 4726 }, { - "epoch": 0.5, - "grad_norm": 2.3663187157291676, - "learning_rate": 5.285304843942265e-06, - "loss": 0.702, + "epoch": 0.34, + "grad_norm": 1.6290112308344942, + "learning_rate": 7.746780308330539e-06, + "loss": 0.587, "step": 4727 }, { - "epoch": 0.5, - "grad_norm": 9.046850596094636, - "learning_rate": 5.283603365949043e-06, - "loss": 0.7287, + "epoch": 0.34, + "grad_norm": 1.6659619343101402, + "learning_rate": 7.745820005243816e-06, + "loss": 0.5553, "step": 4728 }, { - "epoch": 0.5, - "grad_norm": 0.9462619367998221, - "learning_rate": 5.2819018550076e-06, - "loss": 0.5409, + "epoch": 0.34, + "grad_norm": 1.4327982714612173, + "learning_rate": 7.744859557113577e-06, + "loss": 0.5269, "step": 4729 }, { - "epoch": 0.5, - "grad_norm": 2.1605371729694633, - "learning_rate": 5.280200311315616e-06, - "loss": 0.6896, + "epoch": 0.34, + "grad_norm": 1.8245366955185995, + "learning_rate": 7.743898963990557e-06, + "loss": 0.5622, "step": 4730 }, { - "epoch": 0.5, - "grad_norm": 2.9185781197431124, - "learning_rate": 5.278498735070769e-06, - "loss": 0.6377, + "epoch": 0.34, + "grad_norm": 2.2535716907793173, + "learning_rate": 7.742938225925495e-06, + "loss": 0.6181, "step": 4731 }, { - "epoch": 0.5, - "grad_norm": 3.5719498173680098, - "learning_rate": 5.2767971264707445e-06, - "loss": 0.6695, + "epoch": 0.34, + "grad_norm": 1.6299817792803417, + "learning_rate": 7.741977342969143e-06, + "loss": 0.5564, "step": 4732 }, { - "epoch": 0.5, - "grad_norm": 2.8324365081609333, - "learning_rate": 5.27509548571323e-06, - "loss": 0.5886, + "epoch": 0.34, + "grad_norm": 1.4963578845502292, + "learning_rate": 7.741016315172254e-06, + "loss": 0.5687, "step": 4733 }, { - "epoch": 0.5, - "grad_norm": 2.705346452985015, - "learning_rate": 5.273393812995917e-06, - "loss": 0.6509, + "epoch": 0.34, + "grad_norm": 1.5991613573377579, + "learning_rate": 7.740055142585597e-06, + "loss": 0.5763, "step": 4734 }, { - "epoch": 0.5, - "grad_norm": 3.9814173861586815, - "learning_rate": 5.271692108516501e-06, - "loss": 0.7016, + "epoch": 0.34, + "grad_norm": 1.63434430382507, + "learning_rate": 7.739093825259943e-06, + "loss": 0.5345, "step": 4735 }, { - "epoch": 0.5, - "grad_norm": 6.424011098877338, - "learning_rate": 5.269990372472682e-06, - "loss": 0.6616, + "epoch": 0.34, + "grad_norm": 1.8867459904344575, + "learning_rate": 7.738132363246072e-06, + "loss": 0.5427, "step": 4736 }, { - "epoch": 0.5, - "grad_norm": 2.1493045527463837, - "learning_rate": 5.2682886050621604e-06, - "loss": 0.6617, + "epoch": 0.34, + "grad_norm": 1.6280003008213626, + "learning_rate": 7.737170756594771e-06, + "loss": 0.5637, "step": 4737 }, { - "epoch": 0.5, - "grad_norm": 3.716615071908531, - "learning_rate": 5.266586806482646e-06, - "loss": 0.6362, + "epoch": 0.34, + "grad_norm": 1.6247668558943018, + "learning_rate": 7.736209005356835e-06, + "loss": 0.5239, "step": 4738 }, { - "epoch": 0.5, - "grad_norm": 3.118626444685795, - "learning_rate": 5.264884976931845e-06, - "loss": 0.6424, + "epoch": 0.34, + "grad_norm": 1.849762119696607, + "learning_rate": 7.735247109583068e-06, + "loss": 0.5676, "step": 4739 }, { - "epoch": 0.5, - "grad_norm": 2.541880011041433, - "learning_rate": 5.263183116607474e-06, - "loss": 0.5951, + "epoch": 0.34, + "grad_norm": 1.5924661040327408, + "learning_rate": 7.73428506932428e-06, + "loss": 0.5506, "step": 4740 }, { - "epoch": 0.5, - "grad_norm": 1.1285558503464246, - "learning_rate": 5.261481225707251e-06, - "loss": 0.588, + "epoch": 0.34, + "grad_norm": 0.8303048966748472, + "learning_rate": 7.733322884631293e-06, + "loss": 0.4793, "step": 4741 }, { - "epoch": 0.5, - "grad_norm": 2.916993130135385, - "learning_rate": 5.259779304428893e-06, - "loss": 0.6042, + "epoch": 0.34, + "grad_norm": 1.6071786164591642, + "learning_rate": 7.732360555554927e-06, + "loss": 0.5286, "step": 4742 }, { - "epoch": 0.5, - "grad_norm": 2.7254833052810423, - "learning_rate": 5.258077352970128e-06, - "loss": 0.6621, + "epoch": 0.34, + "grad_norm": 1.589172663553812, + "learning_rate": 7.731398082146017e-06, + "loss": 0.5105, "step": 4743 }, { - "epoch": 0.5, - "grad_norm": 3.462602034967814, - "learning_rate": 5.256375371528681e-06, - "loss": 0.5998, + "epoch": 0.34, + "grad_norm": 0.8582754112806343, + "learning_rate": 7.730435464455406e-06, + "loss": 0.4624, "step": 4744 }, { - "epoch": 0.5, - "grad_norm": 2.520686792805083, - "learning_rate": 5.254673360302284e-06, - "loss": 0.6472, + "epoch": 0.34, + "grad_norm": 1.6061851343439688, + "learning_rate": 7.729472702533942e-06, + "loss": 0.5681, "step": 4745 }, { - "epoch": 0.5, - "grad_norm": 3.459934646842888, - "learning_rate": 5.252971319488672e-06, - "loss": 0.6504, + "epoch": 0.34, + "grad_norm": 1.995724518229646, + "learning_rate": 7.728509796432482e-06, + "loss": 0.641, "step": 4746 }, { - "epoch": 0.5, - "grad_norm": 1.0506864779125666, - "learning_rate": 5.2512692492855845e-06, - "loss": 0.5606, + "epoch": 0.34, + "grad_norm": 2.0959088797657786, + "learning_rate": 7.727546746201887e-06, + "loss": 0.5437, "step": 4747 }, { - "epoch": 0.5, - "grad_norm": 2.191202095280542, - "learning_rate": 5.249567149890762e-06, - "loss": 0.6614, + "epoch": 0.34, + "grad_norm": 1.4560839946130193, + "learning_rate": 7.726583551893033e-06, + "loss": 0.5337, "step": 4748 }, { - "epoch": 0.5, - "grad_norm": 10.032018653356111, - "learning_rate": 5.247865021501949e-06, - "loss": 0.608, + "epoch": 0.34, + "grad_norm": 1.9447346179638094, + "learning_rate": 7.725620213556795e-06, + "loss": 0.6548, "step": 4749 }, { - "epoch": 0.5, - "grad_norm": 3.5143563476752124, - "learning_rate": 5.2461628643168935e-06, - "loss": 0.5829, + "epoch": 0.34, + "grad_norm": 1.6064663516213205, + "learning_rate": 7.724656731244062e-06, + "loss": 0.6082, "step": 4750 }, { - "epoch": 0.5, - "grad_norm": 3.066214123433812, - "learning_rate": 5.244460678533349e-06, - "loss": 0.6338, + "epoch": 0.34, + "grad_norm": 1.525867161260493, + "learning_rate": 7.723693105005729e-06, + "loss": 0.5019, "step": 4751 }, { - "epoch": 0.5, - "grad_norm": 3.7234112282653116, - "learning_rate": 5.24275846434907e-06, - "loss": 0.6476, + "epoch": 0.34, + "grad_norm": 1.5798112931773591, + "learning_rate": 7.722729334892696e-06, + "loss": 0.5348, "step": 4752 }, { - "epoch": 0.5, - "grad_norm": 2.5886795596777463, - "learning_rate": 5.2410562219618135e-06, - "loss": 0.652, + "epoch": 0.34, + "grad_norm": 1.9361459170354864, + "learning_rate": 7.721765420955873e-06, + "loss": 0.6235, "step": 4753 }, { - "epoch": 0.5, - "grad_norm": 2.5480441407074443, - "learning_rate": 5.239353951569342e-06, - "loss": 0.5687, + "epoch": 0.34, + "grad_norm": 0.8360129935342129, + "learning_rate": 7.72080136324618e-06, + "loss": 0.4485, "step": 4754 }, { - "epoch": 0.5, - "grad_norm": 2.4463031639209403, - "learning_rate": 5.2376516533694196e-06, - "loss": 0.5338, + "epoch": 0.34, + "grad_norm": 2.6276472752364644, + "learning_rate": 7.719837161814538e-06, + "loss": 0.6187, "step": 4755 }, { - "epoch": 0.5, - "grad_norm": 2.257125530689949, - "learning_rate": 5.235949327559817e-06, - "loss": 0.6191, + "epoch": 0.34, + "grad_norm": 1.5867184818199906, + "learning_rate": 7.718872816711882e-06, + "loss": 0.5434, "step": 4756 }, { - "epoch": 0.5, - "grad_norm": 2.1011431880055143, - "learning_rate": 5.2342469743383026e-06, - "loss": 0.6559, + "epoch": 0.34, + "grad_norm": 1.3960992371657015, + "learning_rate": 7.717908327989151e-06, + "loss": 0.5091, "step": 4757 }, { - "epoch": 0.5, - "grad_norm": 3.487907737324848, - "learning_rate": 5.232544593902652e-06, - "loss": 0.6932, + "epoch": 0.34, + "grad_norm": 1.73568491724809, + "learning_rate": 7.716943695697292e-06, + "loss": 0.6053, "step": 4758 }, { - "epoch": 0.5, - "grad_norm": 2.402578163767159, - "learning_rate": 5.230842186450642e-06, - "loss": 0.6343, + "epoch": 0.34, + "grad_norm": 1.4695473747845849, + "learning_rate": 7.715978919887261e-06, + "loss": 0.5457, "step": 4759 }, { - "epoch": 0.5, - "grad_norm": 3.077478548938603, - "learning_rate": 5.2291397521800545e-06, - "loss": 0.6685, + "epoch": 0.34, + "grad_norm": 1.9886523567517422, + "learning_rate": 7.71501400061002e-06, + "loss": 0.5776, "step": 4760 }, { - "epoch": 0.5, - "grad_norm": 2.2841356416265044, - "learning_rate": 5.227437291288674e-06, - "loss": 0.6646, + "epoch": 0.34, + "grad_norm": 1.6325446847520872, + "learning_rate": 7.71404893791654e-06, + "loss": 0.5468, "step": 4761 }, { - "epoch": 0.5, - "grad_norm": 2.7800487949493458, - "learning_rate": 5.225734803974285e-06, - "loss": 0.6371, + "epoch": 0.34, + "grad_norm": 2.0336008845849003, + "learning_rate": 7.7130837318578e-06, + "loss": 0.5756, "step": 4762 }, { - "epoch": 0.5, - "grad_norm": 2.2976515988461337, - "learning_rate": 5.22403229043468e-06, - "loss": 0.6387, + "epoch": 0.34, + "grad_norm": 0.7098736558301314, + "learning_rate": 7.712118382484783e-06, + "loss": 0.4638, "step": 4763 }, { - "epoch": 0.5, - "grad_norm": 2.6430943743263358, - "learning_rate": 5.222329750867649e-06, - "loss": 0.6613, + "epoch": 0.34, + "grad_norm": 6.20426650736749, + "learning_rate": 7.711152889848483e-06, + "loss": 0.5422, "step": 4764 }, { - "epoch": 0.5, - "grad_norm": 2.659848728533461, - "learning_rate": 5.220627185470993e-06, - "loss": 0.614, + "epoch": 0.34, + "grad_norm": 0.7449859830134952, + "learning_rate": 7.710187253999901e-06, + "loss": 0.4208, "step": 4765 }, { - "epoch": 0.5, - "grad_norm": 3.5083748901897884, - "learning_rate": 5.218924594442507e-06, - "loss": 0.5716, + "epoch": 0.34, + "grad_norm": 1.4314056198982683, + "learning_rate": 7.709221474990044e-06, + "loss": 0.5646, "step": 4766 }, { - "epoch": 0.5, - "grad_norm": 2.8298517158269325, - "learning_rate": 5.217221977979996e-06, - "loss": 0.6882, + "epoch": 0.34, + "grad_norm": 1.5274650194258994, + "learning_rate": 7.70825555286993e-06, + "loss": 0.6155, "step": 4767 }, { - "epoch": 0.5, - "grad_norm": 3.7161705013278534, - "learning_rate": 5.215519336281261e-06, - "loss": 0.7084, + "epoch": 0.34, + "grad_norm": 1.647708428949298, + "learning_rate": 7.70728948769058e-06, + "loss": 0.5346, "step": 4768 }, { - "epoch": 0.5, - "grad_norm": 2.4592185775923303, - "learning_rate": 5.213816669544114e-06, - "loss": 0.5649, + "epoch": 0.34, + "grad_norm": 1.6189750613850027, + "learning_rate": 7.706323279503025e-06, + "loss": 0.52, "step": 4769 }, { - "epoch": 0.5, - "grad_norm": 2.7485436454433247, - "learning_rate": 5.2121139779663645e-06, - "loss": 0.6664, + "epoch": 0.34, + "grad_norm": 1.9922713262163763, + "learning_rate": 7.705356928358306e-06, + "loss": 0.4895, "step": 4770 }, { - "epoch": 0.5, - "grad_norm": 3.0981595313863677, - "learning_rate": 5.2104112617458254e-06, - "loss": 0.6766, + "epoch": 0.34, + "grad_norm": 1.4977263557355764, + "learning_rate": 7.704390434307465e-06, + "loss": 0.5349, "step": 4771 }, { - "epoch": 0.5, - "grad_norm": 4.0052501193362255, - "learning_rate": 5.2087085210803145e-06, - "loss": 0.7007, + "epoch": 0.34, + "grad_norm": 2.5665371378959403, + "learning_rate": 7.70342379740156e-06, + "loss": 0.5721, "step": 4772 }, { - "epoch": 0.5, - "grad_norm": 3.1047565880215737, - "learning_rate": 5.207005756167651e-06, - "loss": 0.6448, + "epoch": 0.34, + "grad_norm": 1.5981697390000285, + "learning_rate": 7.702457017691647e-06, + "loss": 0.5859, "step": 4773 }, { - "epoch": 0.5, - "grad_norm": 2.792375318373496, - "learning_rate": 5.205302967205657e-06, - "loss": 0.6724, + "epoch": 0.34, + "grad_norm": 1.6746877287153015, + "learning_rate": 7.701490095228799e-06, + "loss": 0.5724, "step": 4774 }, { - "epoch": 0.5, - "grad_norm": 2.51567986610389, - "learning_rate": 5.203600154392158e-06, - "loss": 0.606, + "epoch": 0.34, + "grad_norm": 1.686120282672347, + "learning_rate": 7.70052303006409e-06, + "loss": 0.5677, "step": 4775 }, { - "epoch": 0.5, - "grad_norm": 2.5125662323316287, - "learning_rate": 5.2018973179249824e-06, - "loss": 0.6156, + "epoch": 0.34, + "grad_norm": 1.8140008716657556, + "learning_rate": 7.6995558222486e-06, + "loss": 0.5822, "step": 4776 }, { - "epoch": 0.5, - "grad_norm": 2.2159857718006224, - "learning_rate": 5.200194458001958e-06, - "loss": 0.5654, + "epoch": 0.34, + "grad_norm": 1.6893474044655168, + "learning_rate": 7.698588471833428e-06, + "loss": 0.5697, "step": 4777 }, { - "epoch": 0.5, - "grad_norm": 2.9452532347483427, - "learning_rate": 5.198491574820923e-06, - "loss": 0.603, + "epoch": 0.34, + "grad_norm": 1.734751217595584, + "learning_rate": 7.69762097886967e-06, + "loss": 0.5778, "step": 4778 }, { - "epoch": 0.5, - "grad_norm": 6.142355113115631, - "learning_rate": 5.196788668579708e-06, - "loss": 0.6565, + "epoch": 0.34, + "grad_norm": 1.436888762588941, + "learning_rate": 7.69665334340843e-06, + "loss": 0.5325, "step": 4779 }, { - "epoch": 0.5, - "grad_norm": 2.2887002234975764, - "learning_rate": 5.195085739476156e-06, - "loss": 0.6851, + "epoch": 0.34, + "grad_norm": 1.7350723068283707, + "learning_rate": 7.695685565500823e-06, + "loss": 0.5576, "step": 4780 }, { - "epoch": 0.5, - "grad_norm": 3.085541461688833, - "learning_rate": 5.193382787708106e-06, - "loss": 0.5886, + "epoch": 0.34, + "grad_norm": 1.7864406869086606, + "learning_rate": 7.694717645197968e-06, + "loss": 0.565, "step": 4781 }, { - "epoch": 0.5, - "grad_norm": 7.500928456155343, - "learning_rate": 5.191679813473402e-06, - "loss": 0.5474, + "epoch": 0.34, + "grad_norm": 1.6261504304099974, + "learning_rate": 7.693749582550999e-06, + "loss": 0.5156, "step": 4782 }, { - "epoch": 0.5, - "grad_norm": 6.985875727924238, - "learning_rate": 5.189976816969892e-06, - "loss": 0.6326, + "epoch": 0.34, + "grad_norm": 1.62301017831468, + "learning_rate": 7.692781377611047e-06, + "loss": 0.558, "step": 4783 }, { - "epoch": 0.5, - "grad_norm": 3.1970563840062383, - "learning_rate": 5.188273798395425e-06, - "loss": 0.7014, + "epoch": 0.34, + "grad_norm": 0.7381646704606706, + "learning_rate": 7.691813030429261e-06, + "loss": 0.4576, "step": 4784 }, { - "epoch": 0.5, - "grad_norm": 3.241455526944972, - "learning_rate": 5.186570757947852e-06, - "loss": 0.731, + "epoch": 0.34, + "grad_norm": 1.5586049965399558, + "learning_rate": 7.690844541056787e-06, + "loss": 0.5084, "step": 4785 }, { - "epoch": 0.5, - "grad_norm": 3.576374543674953, - "learning_rate": 5.1848676958250265e-06, - "loss": 0.6363, + "epoch": 0.34, + "grad_norm": 2.083249706596281, + "learning_rate": 7.689875909544787e-06, + "loss": 0.5892, "step": 4786 }, { - "epoch": 0.5, - "grad_norm": 2.335548183754556, - "learning_rate": 5.183164612224809e-06, - "loss": 0.583, + "epoch": 0.34, + "grad_norm": 1.4237902770939703, + "learning_rate": 7.688907135944429e-06, + "loss": 0.5123, "step": 4787 }, { - "epoch": 0.5, - "grad_norm": 2.4154405135354136, - "learning_rate": 5.181461507345054e-06, - "loss": 0.5799, + "epoch": 0.34, + "grad_norm": 1.5416031283184448, + "learning_rate": 7.687938220306883e-06, + "loss": 0.4911, "step": 4788 }, { - "epoch": 0.5, - "grad_norm": 3.5423104288104916, - "learning_rate": 5.1797583813836285e-06, - "loss": 0.6326, + "epoch": 0.34, + "grad_norm": 2.2552860452664545, + "learning_rate": 7.686969162683331e-06, + "loss": 0.5714, "step": 4789 }, { - "epoch": 0.5, - "grad_norm": 2.5045963993199747, - "learning_rate": 5.178055234538391e-06, - "loss": 0.6283, + "epoch": 0.34, + "grad_norm": 1.7237894212475746, + "learning_rate": 7.685999963124965e-06, + "loss": 0.6169, "step": 4790 }, { - "epoch": 0.5, - "grad_norm": 2.0244708876329987, - "learning_rate": 5.176352067007213e-06, - "loss": 0.6263, + "epoch": 0.34, + "grad_norm": 1.5028336770741848, + "learning_rate": 7.685030621682978e-06, + "loss": 0.4715, "step": 4791 }, { - "epoch": 0.5, - "grad_norm": 2.255397053263638, - "learning_rate": 5.174648878987959e-06, - "loss": 0.6359, + "epoch": 0.34, + "grad_norm": 1.4007546398825015, + "learning_rate": 7.684061138408577e-06, + "loss": 0.5168, "step": 4792 }, { - "epoch": 0.5, - "grad_norm": 3.722706053854962, - "learning_rate": 5.1729456706785055e-06, - "loss": 0.6435, + "epoch": 0.34, + "grad_norm": 1.767005635024293, + "learning_rate": 7.683091513352972e-06, + "loss": 0.5599, "step": 4793 }, { - "epoch": 0.5, - "grad_norm": 2.075448749546548, - "learning_rate": 5.1712424422767224e-06, - "loss": 0.6811, + "epoch": 0.34, + "grad_norm": 1.4711843271767562, + "learning_rate": 7.68212174656738e-06, + "loss": 0.5376, "step": 4794 }, { - "epoch": 0.5, - "grad_norm": 2.688883878063154, - "learning_rate": 5.169539193980489e-06, - "loss": 0.6266, + "epoch": 0.34, + "grad_norm": 1.5719253652355647, + "learning_rate": 7.68115183810303e-06, + "loss": 0.5822, "step": 4795 }, { - "epoch": 0.5, - "grad_norm": 2.9204150200499615, - "learning_rate": 5.1678359259876824e-06, - "loss": 0.6795, + "epoch": 0.34, + "grad_norm": 1.769427713431006, + "learning_rate": 7.680181788011156e-06, + "loss": 0.6491, "step": 4796 }, { - "epoch": 0.5, - "grad_norm": 1.1559522338055792, - "learning_rate": 5.1661326384961805e-06, - "loss": 0.5607, + "epoch": 0.34, + "grad_norm": 1.5718728251179117, + "learning_rate": 7.679211596342997e-06, + "loss": 0.5041, "step": 4797 }, { - "epoch": 0.5, - "grad_norm": 1.9541032407056105, - "learning_rate": 5.164429331703871e-06, - "loss": 0.6785, + "epoch": 0.34, + "grad_norm": 2.2696226753782676, + "learning_rate": 7.678241263149802e-06, + "loss": 0.5169, "step": 4798 }, { - "epoch": 0.5, - "grad_norm": 0.8942781057490377, - "learning_rate": 5.162726005808636e-06, - "loss": 0.5714, + "epoch": 0.34, + "grad_norm": 1.572332338164595, + "learning_rate": 7.677270788482831e-06, + "loss": 0.5247, "step": 4799 }, { - "epoch": 0.51, - "grad_norm": 1.079312812042359, - "learning_rate": 5.1610226610083655e-06, - "loss": 0.5694, + "epoch": 0.34, + "grad_norm": 1.7618022883940945, + "learning_rate": 7.676300172393344e-06, + "loss": 0.596, "step": 4800 }, { - "epoch": 0.51, - "grad_norm": 3.277923548372197, - "learning_rate": 5.159319297500945e-06, - "loss": 0.6173, + "epoch": 0.34, + "grad_norm": 1.8392300248332514, + "learning_rate": 7.675329414932613e-06, + "loss": 0.5715, "step": 4801 }, { - "epoch": 0.51, - "grad_norm": 2.986097736979872, - "learning_rate": 5.157615915484273e-06, - "loss": 0.6071, + "epoch": 0.34, + "grad_norm": 1.6656413306655593, + "learning_rate": 7.674358516151918e-06, + "loss": 0.5455, "step": 4802 }, { - "epoch": 0.51, - "grad_norm": 3.2600607771581642, - "learning_rate": 5.155912515156236e-06, - "loss": 0.6338, + "epoch": 0.34, + "grad_norm": 1.5292499250991674, + "learning_rate": 7.673387476102543e-06, + "loss": 0.535, "step": 4803 }, { - "epoch": 0.51, - "grad_norm": 4.444448387829765, - "learning_rate": 5.154209096714736e-06, - "loss": 0.635, + "epoch": 0.34, + "grad_norm": 1.594726544482017, + "learning_rate": 7.672416294835784e-06, + "loss": 0.6505, "step": 4804 }, { - "epoch": 0.51, - "grad_norm": 2.2143014150947318, - "learning_rate": 5.152505660357667e-06, - "loss": 0.6542, + "epoch": 0.34, + "grad_norm": 0.8593751249336438, + "learning_rate": 7.671444972402939e-06, + "loss": 0.4607, "step": 4805 }, { - "epoch": 0.51, - "grad_norm": 2.685218090583335, - "learning_rate": 5.150802206282932e-06, - "loss": 0.5947, + "epoch": 0.34, + "grad_norm": 2.1173938895811286, + "learning_rate": 7.670473508855321e-06, + "loss": 0.563, "step": 4806 }, { - "epoch": 0.51, - "grad_norm": 2.593209127978214, - "learning_rate": 5.149098734688434e-06, - "loss": 0.6186, + "epoch": 0.34, + "grad_norm": 1.7433131810540359, + "learning_rate": 7.669501904244242e-06, + "loss": 0.496, "step": 4807 }, { - "epoch": 0.51, - "grad_norm": 4.2086494702555886, - "learning_rate": 5.147395245772074e-06, - "loss": 0.737, + "epoch": 0.34, + "grad_norm": 1.483450925235316, + "learning_rate": 7.668530158621026e-06, + "loss": 0.5235, "step": 4808 }, { - "epoch": 0.51, - "grad_norm": 10.188890598572003, - "learning_rate": 5.145691739731761e-06, - "loss": 0.6247, + "epoch": 0.34, + "grad_norm": 1.6297819618183846, + "learning_rate": 7.667558272037006e-06, + "loss": 0.5459, "step": 4809 }, { - "epoch": 0.51, - "grad_norm": 9.44137252928556, - "learning_rate": 5.143988216765402e-06, - "loss": 0.6595, + "epoch": 0.34, + "grad_norm": 1.5683364087194016, + "learning_rate": 7.666586244543518e-06, + "loss": 0.5795, "step": 4810 }, { - "epoch": 0.51, - "grad_norm": 3.172419643746679, - "learning_rate": 5.142284677070911e-06, - "loss": 0.6992, + "epoch": 0.34, + "grad_norm": 1.85860885831951, + "learning_rate": 7.665614076191911e-06, + "loss": 0.513, "step": 4811 }, { - "epoch": 0.51, - "grad_norm": 2.952102588392533, - "learning_rate": 5.140581120846194e-06, - "loss": 0.6163, + "epoch": 0.34, + "grad_norm": 1.7199229605812114, + "learning_rate": 7.664641767033536e-06, + "loss": 0.5455, "step": 4812 }, { - "epoch": 0.51, - "grad_norm": 2.6735180292043665, - "learning_rate": 5.138877548289173e-06, - "loss": 0.6745, + "epoch": 0.34, + "grad_norm": 1.5205261425243821, + "learning_rate": 7.663669317119751e-06, + "loss": 0.5078, "step": 4813 }, { - "epoch": 0.51, - "grad_norm": 3.3271844953089964, - "learning_rate": 5.137173959597755e-06, - "loss": 0.7083, + "epoch": 0.34, + "grad_norm": 58.834792463460445, + "learning_rate": 7.66269672650193e-06, + "loss": 0.5082, "step": 4814 }, { - "epoch": 0.51, - "grad_norm": 5.779885373526377, - "learning_rate": 5.135470354969867e-06, - "loss": 0.5758, + "epoch": 0.34, + "grad_norm": 1.5668538249849029, + "learning_rate": 7.661723995231446e-06, + "loss": 0.4899, "step": 4815 }, { - "epoch": 0.51, - "grad_norm": 3.880291332502046, - "learning_rate": 5.1337667346034226e-06, - "loss": 0.6426, + "epoch": 0.34, + "grad_norm": 1.887973846532742, + "learning_rate": 7.660751123359678e-06, + "loss": 0.6257, "step": 4816 }, { - "epoch": 0.51, - "grad_norm": 2.789682189366147, - "learning_rate": 5.132063098696346e-06, - "loss": 0.6936, + "epoch": 0.34, + "grad_norm": 1.5567215291808731, + "learning_rate": 7.659778110938022e-06, + "loss": 0.5737, "step": 4817 }, { - "epoch": 0.51, - "grad_norm": 2.5838279711205714, - "learning_rate": 5.130359447446561e-06, - "loss": 0.6659, + "epoch": 0.34, + "grad_norm": 1.517713406565091, + "learning_rate": 7.658804958017873e-06, + "loss": 0.5096, "step": 4818 }, { - "epoch": 0.51, - "grad_norm": 3.4258511041007518, - "learning_rate": 5.128655781051991e-06, - "loss": 0.6169, + "epoch": 0.34, + "grad_norm": 2.058621110147777, + "learning_rate": 7.657831664650638e-06, + "loss": 0.5615, "step": 4819 }, { - "epoch": 0.51, - "grad_norm": 3.6136439580060657, - "learning_rate": 5.126952099710566e-06, - "loss": 0.6806, + "epoch": 0.34, + "grad_norm": 1.5959400392090728, + "learning_rate": 7.656858230887727e-06, + "loss": 0.5266, "step": 4820 }, { - "epoch": 0.51, - "grad_norm": 3.209126945224101, - "learning_rate": 5.125248403620211e-06, - "loss": 0.6683, + "epoch": 0.34, + "grad_norm": 1.5666059401846921, + "learning_rate": 7.65588465678056e-06, + "loss": 0.566, "step": 4821 }, { - "epoch": 0.51, - "grad_norm": 2.809317990932967, - "learning_rate": 5.12354469297886e-06, - "loss": 0.6826, + "epoch": 0.34, + "grad_norm": 2.2528006175698367, + "learning_rate": 7.65491094238057e-06, + "loss": 0.5403, "step": 4822 }, { - "epoch": 0.51, - "grad_norm": 3.2384938127477425, - "learning_rate": 5.121840967984443e-06, - "loss": 0.6434, + "epoch": 0.34, + "grad_norm": 1.6886750127941925, + "learning_rate": 7.653937087739187e-06, + "loss": 0.5452, "step": 4823 }, { - "epoch": 0.51, - "grad_norm": 2.2368138450617656, - "learning_rate": 5.120137228834896e-06, - "loss": 0.6217, + "epoch": 0.34, + "grad_norm": 0.8569873644034576, + "learning_rate": 7.652963092907854e-06, + "loss": 0.4566, "step": 4824 }, { - "epoch": 0.51, - "grad_norm": 2.452259843593572, - "learning_rate": 5.1184334757281506e-06, - "loss": 0.6101, + "epoch": 0.34, + "grad_norm": 0.7780920924938105, + "learning_rate": 7.65198895793802e-06, + "loss": 0.4337, "step": 4825 }, { - "epoch": 0.51, - "grad_norm": 2.585727076856822, - "learning_rate": 5.1167297088621485e-06, - "loss": 0.6537, + "epoch": 0.34, + "grad_norm": 1.589020997619751, + "learning_rate": 7.651014682881142e-06, + "loss": 0.5182, "step": 4826 }, { - "epoch": 0.51, - "grad_norm": 4.454616726505247, - "learning_rate": 5.1150259284348246e-06, - "loss": 0.571, + "epoch": 0.34, + "grad_norm": 1.50766543556978, + "learning_rate": 7.650040267788688e-06, + "loss": 0.5189, "step": 4827 }, { - "epoch": 0.51, - "grad_norm": 4.132538433226866, - "learning_rate": 5.113322134644122e-06, - "loss": 0.6571, + "epoch": 0.34, + "grad_norm": 1.483784246253127, + "learning_rate": 7.649065712712126e-06, + "loss": 0.5413, "step": 4828 }, { - "epoch": 0.51, - "grad_norm": 2.143250401873857, - "learning_rate": 5.111618327687981e-06, - "loss": 0.5883, + "epoch": 0.34, + "grad_norm": 1.649200571717978, + "learning_rate": 7.648091017702937e-06, + "loss": 0.5977, "step": 4829 }, { - "epoch": 0.51, - "grad_norm": 6.356716637447877, - "learning_rate": 5.109914507764345e-06, - "loss": 0.6736, + "epoch": 0.34, + "grad_norm": 1.525791580060235, + "learning_rate": 7.64711618281261e-06, + "loss": 0.552, "step": 4830 }, { - "epoch": 0.51, - "grad_norm": 2.7253830188411547, - "learning_rate": 5.108210675071159e-06, - "loss": 0.6881, + "epoch": 0.34, + "grad_norm": 1.491987629365099, + "learning_rate": 7.646141208092634e-06, + "loss": 0.5404, "step": 4831 }, { - "epoch": 0.51, - "grad_norm": 3.4516235016563788, - "learning_rate": 5.1065068298063705e-06, - "loss": 0.6512, + "epoch": 0.34, + "grad_norm": 1.567980871378964, + "learning_rate": 7.645166093594513e-06, + "loss": 0.4947, "step": 4832 }, { - "epoch": 0.51, - "grad_norm": 2.7073002106917237, - "learning_rate": 5.104802972167926e-06, - "loss": 0.6957, + "epoch": 0.34, + "grad_norm": 0.793874034007292, + "learning_rate": 7.644190839369757e-06, + "loss": 0.4641, "step": 4833 }, { - "epoch": 0.51, - "grad_norm": 2.7841704207983984, - "learning_rate": 5.103099102353775e-06, - "loss": 0.5775, + "epoch": 0.34, + "grad_norm": 1.6415951114145668, + "learning_rate": 7.643215445469878e-06, + "loss": 0.5859, "step": 4834 }, { - "epoch": 0.51, - "grad_norm": 7.9292838099679965, - "learning_rate": 5.101395220561869e-06, - "loss": 0.7005, + "epoch": 0.34, + "grad_norm": 1.7324865016573279, + "learning_rate": 7.642239911946406e-06, + "loss": 0.571, "step": 4835 }, { - "epoch": 0.51, - "grad_norm": 2.7773812390741077, - "learning_rate": 5.099691326990158e-06, - "loss": 0.5243, + "epoch": 0.34, + "grad_norm": 2.324937403560273, + "learning_rate": 7.641264238850868e-06, + "loss": 0.5585, "step": 4836 }, { - "epoch": 0.51, - "grad_norm": 3.26055135706108, - "learning_rate": 5.0979874218365985e-06, - "loss": 0.6554, + "epoch": 0.34, + "grad_norm": 1.9538347556298559, + "learning_rate": 7.640288426234803e-06, + "loss": 0.6562, "step": 4837 }, { - "epoch": 0.51, - "grad_norm": 2.93701844263028, - "learning_rate": 5.096283505299142e-06, - "loss": 0.5827, + "epoch": 0.34, + "grad_norm": 1.5214735419090046, + "learning_rate": 7.639312474149756e-06, + "loss": 0.5171, "step": 4838 }, { - "epoch": 0.51, - "grad_norm": 2.661195675756214, - "learning_rate": 5.094579577575748e-06, - "loss": 0.7464, + "epoch": 0.34, + "grad_norm": 1.4970476526237704, + "learning_rate": 7.638336382647284e-06, + "loss": 0.5608, "step": 4839 }, { - "epoch": 0.51, - "grad_norm": 4.4434201960292805, - "learning_rate": 5.09287563886437e-06, - "loss": 0.6926, + "epoch": 0.34, + "grad_norm": 1.7210879157361947, + "learning_rate": 7.63736015177894e-06, + "loss": 0.5276, "step": 4840 }, { - "epoch": 0.51, - "grad_norm": 2.8619375312083424, - "learning_rate": 5.0911716893629695e-06, - "loss": 0.7326, + "epoch": 0.34, + "grad_norm": 1.531741214028701, + "learning_rate": 7.6363837815963e-06, + "loss": 0.5937, "step": 4841 }, { - "epoch": 0.51, - "grad_norm": 4.179918987044076, - "learning_rate": 5.089467729269506e-06, - "loss": 0.6202, + "epoch": 0.34, + "grad_norm": 0.8131576492914907, + "learning_rate": 7.635407272150933e-06, + "loss": 0.4687, "step": 4842 }, { - "epoch": 0.51, - "grad_norm": 2.3674261810438875, - "learning_rate": 5.087763758781941e-06, - "loss": 0.5401, + "epoch": 0.34, + "grad_norm": 1.6860656314710765, + "learning_rate": 7.634430623494425e-06, + "loss": 0.5734, "step": 4843 }, { - "epoch": 0.51, - "grad_norm": 2.954478033527312, - "learning_rate": 5.0860597780982345e-06, - "loss": 0.6457, + "epoch": 0.34, + "grad_norm": 1.6700986817823413, + "learning_rate": 7.633453835678364e-06, + "loss": 0.5316, "step": 4844 }, { - "epoch": 0.51, - "grad_norm": 6.1055773678841705, - "learning_rate": 5.084355787416352e-06, - "loss": 0.7077, + "epoch": 0.34, + "grad_norm": 1.7615374044550225, + "learning_rate": 7.632476908754347e-06, + "loss": 0.5724, "step": 4845 }, { - "epoch": 0.51, - "grad_norm": 3.104870089140017, - "learning_rate": 5.08265178693426e-06, - "loss": 0.5571, + "epoch": 0.34, + "grad_norm": 2.204073366286373, + "learning_rate": 7.631499842773981e-06, + "loss": 0.5371, "step": 4846 }, { - "epoch": 0.51, - "grad_norm": 3.282087698903521, - "learning_rate": 5.08094777684992e-06, - "loss": 0.7171, + "epoch": 0.34, + "grad_norm": 1.8241263858546308, + "learning_rate": 7.630522637788878e-06, + "loss": 0.6629, "step": 4847 }, { - "epoch": 0.51, - "grad_norm": 5.212353373051497, - "learning_rate": 5.079243757361304e-06, - "loss": 0.6634, + "epoch": 0.34, + "grad_norm": 1.5406400587178273, + "learning_rate": 7.629545293850653e-06, + "loss": 0.6116, "step": 4848 }, { - "epoch": 0.51, - "grad_norm": 3.151447480509918, - "learning_rate": 5.077539728666374e-06, - "loss": 0.6715, + "epoch": 0.34, + "grad_norm": 1.8682256651511238, + "learning_rate": 7.628567811010937e-06, + "loss": 0.6185, "step": 4849 }, { - "epoch": 0.51, - "grad_norm": 2.7192044541711002, - "learning_rate": 5.0758356909631055e-06, - "loss": 0.6798, + "epoch": 0.34, + "grad_norm": 1.6584404498472998, + "learning_rate": 7.627590189321363e-06, + "loss": 0.5586, "step": 4850 }, { - "epoch": 0.51, - "grad_norm": 2.7229617539344644, - "learning_rate": 5.074131644449462e-06, - "loss": 0.6132, + "epoch": 0.34, + "grad_norm": 0.8449224191246342, + "learning_rate": 7.626612428833571e-06, + "loss": 0.4777, "step": 4851 }, { - "epoch": 0.51, - "grad_norm": 6.5465212384001665, - "learning_rate": 5.072427589323422e-06, - "loss": 0.6457, + "epoch": 0.34, + "grad_norm": 1.830188346864979, + "learning_rate": 7.625634529599211e-06, + "loss": 0.5255, "step": 4852 }, { - "epoch": 0.51, - "grad_norm": 2.539686113044392, - "learning_rate": 5.0707235257829525e-06, - "loss": 0.6064, + "epoch": 0.34, + "grad_norm": 1.7914593141053055, + "learning_rate": 7.624656491669937e-06, + "loss": 0.5576, "step": 4853 }, { - "epoch": 0.51, - "grad_norm": 2.8359956675113813, - "learning_rate": 5.069019454026028e-06, - "loss": 0.6889, + "epoch": 0.34, + "grad_norm": 1.792578111815124, + "learning_rate": 7.6236783150974145e-06, + "loss": 0.5895, "step": 4854 }, { - "epoch": 0.51, - "grad_norm": 3.075569786140454, - "learning_rate": 5.067315374250623e-06, - "loss": 0.6786, + "epoch": 0.34, + "grad_norm": 1.518646903916901, + "learning_rate": 7.622699999933313e-06, + "loss": 0.5675, "step": 4855 }, { - "epoch": 0.51, - "grad_norm": 2.936345810031271, - "learning_rate": 5.065611286654712e-06, - "loss": 0.6724, + "epoch": 0.34, + "grad_norm": 1.8836355845883688, + "learning_rate": 7.621721546229311e-06, + "loss": 0.5751, "step": 4856 }, { - "epoch": 0.51, - "grad_norm": 2.7223753328820366, - "learning_rate": 5.063907191436274e-06, - "loss": 0.6038, + "epoch": 0.34, + "grad_norm": 0.756698389515884, + "learning_rate": 7.620742954037091e-06, + "loss": 0.4206, "step": 4857 }, { - "epoch": 0.51, - "grad_norm": 2.747137280849216, - "learning_rate": 5.062203088793279e-06, - "loss": 0.6451, + "epoch": 0.34, + "grad_norm": 1.7048508117577985, + "learning_rate": 7.61976422340835e-06, + "loss": 0.6111, "step": 4858 }, { - "epoch": 0.51, - "grad_norm": 2.534090883953568, - "learning_rate": 5.060498978923713e-06, - "loss": 0.7221, + "epoch": 0.34, + "grad_norm": 1.5146159304395557, + "learning_rate": 7.618785354394786e-06, + "loss": 0.5126, "step": 4859 }, { - "epoch": 0.51, - "grad_norm": 2.8222282010733877, - "learning_rate": 5.058794862025548e-06, - "loss": 0.6084, + "epoch": 0.34, + "grad_norm": 1.6081244336897265, + "learning_rate": 7.617806347048104e-06, + "loss": 0.5521, "step": 4860 }, { - "epoch": 0.51, - "grad_norm": 3.0443913355409364, - "learning_rate": 5.057090738296767e-06, - "loss": 0.6554, + "epoch": 0.34, + "grad_norm": 1.6143377344271912, + "learning_rate": 7.616827201420023e-06, + "loss": 0.6049, "step": 4861 }, { - "epoch": 0.51, - "grad_norm": 3.2515144608074653, - "learning_rate": 5.055386607935347e-06, - "loss": 0.5725, + "epoch": 0.35, + "grad_norm": 1.653392999902985, + "learning_rate": 7.6158479175622605e-06, + "loss": 0.4772, "step": 4862 }, { - "epoch": 0.51, - "grad_norm": 1.1037680116930089, - "learning_rate": 5.053682471139275e-06, - "loss": 0.5484, + "epoch": 0.35, + "grad_norm": 1.575509215885776, + "learning_rate": 7.614868495526547e-06, + "loss": 0.5316, "step": 4863 }, { - "epoch": 0.51, - "grad_norm": 3.5575549338006436, - "learning_rate": 5.051978328106525e-06, - "loss": 0.6632, + "epoch": 0.35, + "grad_norm": 3.1368802533512565, + "learning_rate": 7.613888935364619e-06, + "loss": 0.5572, "step": 4864 }, { - "epoch": 0.51, - "grad_norm": 2.962207242548026, - "learning_rate": 5.050274179035084e-06, - "loss": 0.6792, + "epoch": 0.35, + "grad_norm": 1.5620875146107172, + "learning_rate": 7.612909237128223e-06, + "loss": 0.5678, "step": 4865 }, { - "epoch": 0.51, - "grad_norm": 3.5877705132080306, - "learning_rate": 5.048570024122935e-06, - "loss": 0.6326, + "epoch": 0.35, + "grad_norm": 1.5828299831330783, + "learning_rate": 7.611929400869105e-06, + "loss": 0.5209, "step": 4866 }, { - "epoch": 0.51, - "grad_norm": 6.322301722453874, - "learning_rate": 5.046865863568061e-06, - "loss": 0.7552, + "epoch": 0.35, + "grad_norm": 1.5809980425438976, + "learning_rate": 7.610949426639027e-06, + "loss": 0.5669, "step": 4867 }, { - "epoch": 0.51, - "grad_norm": 6.682490269010231, - "learning_rate": 5.045161697568446e-06, - "loss": 0.6046, + "epoch": 0.35, + "grad_norm": 1.5766604115891725, + "learning_rate": 7.609969314489751e-06, + "loss": 0.4951, "step": 4868 }, { - "epoch": 0.51, - "grad_norm": 3.7731626743807523, - "learning_rate": 5.0434575263220745e-06, - "loss": 0.6787, + "epoch": 0.35, + "grad_norm": 1.5849843379862447, + "learning_rate": 7.608989064473056e-06, + "loss": 0.5668, "step": 4869 }, { - "epoch": 0.51, - "grad_norm": 2.898042881785744, - "learning_rate": 5.041753350026936e-06, - "loss": 0.5913, + "epoch": 0.35, + "grad_norm": 1.6476264245736973, + "learning_rate": 7.608008676640715e-06, + "loss": 0.4885, "step": 4870 }, { - "epoch": 0.51, - "grad_norm": 5.927842102069006, - "learning_rate": 5.0400491688810105e-06, - "loss": 0.6178, + "epoch": 0.35, + "grad_norm": 1.690886090642633, + "learning_rate": 7.607028151044518e-06, + "loss": 0.4808, "step": 4871 }, { - "epoch": 0.51, - "grad_norm": 1.1051954463354348, - "learning_rate": 5.038344983082292e-06, - "loss": 0.5853, + "epoch": 0.35, + "grad_norm": 1.4647931121100588, + "learning_rate": 7.606047487736261e-06, + "loss": 0.5362, "step": 4872 }, { - "epoch": 0.51, - "grad_norm": 2.71794682870089, - "learning_rate": 5.036640792828761e-06, - "loss": 0.6183, + "epoch": 0.35, + "grad_norm": 1.6230576692987182, + "learning_rate": 7.6050666867677455e-06, + "loss": 0.5413, "step": 4873 }, { - "epoch": 0.51, - "grad_norm": 3.746461374115521, - "learning_rate": 5.0349365983184105e-06, - "loss": 0.6803, + "epoch": 0.35, + "grad_norm": 1.5127299884626233, + "learning_rate": 7.604085748190782e-06, + "loss": 0.6063, "step": 4874 }, { - "epoch": 0.51, - "grad_norm": 2.2340308167373597, - "learning_rate": 5.033232399749226e-06, - "loss": 0.6801, + "epoch": 0.35, + "grad_norm": 2.21297096000894, + "learning_rate": 7.6031046720571825e-06, + "loss": 0.6045, "step": 4875 }, { - "epoch": 0.51, - "grad_norm": 2.9189306232219194, - "learning_rate": 5.031528197319197e-06, - "loss": 0.6212, + "epoch": 0.35, + "grad_norm": 0.7899458230224787, + "learning_rate": 7.602123458418775e-06, + "loss": 0.41, "step": 4876 }, { - "epoch": 0.51, - "grad_norm": 2.3120947610822853, - "learning_rate": 5.0298239912263145e-06, - "loss": 0.6743, + "epoch": 0.35, + "grad_norm": 1.8836586484147022, + "learning_rate": 7.6011421073273904e-06, + "loss": 0.5807, "step": 4877 }, { - "epoch": 0.51, - "grad_norm": 2.838555185528894, - "learning_rate": 5.028119781668566e-06, - "loss": 0.6603, + "epoch": 0.35, + "grad_norm": 1.535184828037114, + "learning_rate": 7.600160618834864e-06, + "loss": 0.5706, "step": 4878 }, { - "epoch": 0.51, - "grad_norm": 1.8865667092436296, - "learning_rate": 5.026415568843943e-06, - "loss": 0.4771, + "epoch": 0.35, + "grad_norm": 1.647426060360714, + "learning_rate": 7.599178992993044e-06, + "loss": 0.5794, "step": 4879 }, { - "epoch": 0.51, - "grad_norm": 2.7027338628136923, - "learning_rate": 5.024711352950435e-06, - "loss": 0.6176, + "epoch": 0.35, + "grad_norm": 2.2132808145039955, + "learning_rate": 7.598197229853783e-06, + "loss": 0.5073, "step": 4880 }, { - "epoch": 0.51, - "grad_norm": 3.3089641577867184, - "learning_rate": 5.023007134186035e-06, - "loss": 0.5705, + "epoch": 0.35, + "grad_norm": 1.65059769814612, + "learning_rate": 7.5972153294689385e-06, + "loss": 0.5953, "step": 4881 }, { - "epoch": 0.51, - "grad_norm": 2.1851775810718976, - "learning_rate": 5.0213029127487315e-06, - "loss": 0.5628, + "epoch": 0.35, + "grad_norm": 0.7624652726855439, + "learning_rate": 7.596233291890382e-06, + "loss": 0.4645, "step": 4882 }, { - "epoch": 0.51, - "grad_norm": 3.5359776818740016, - "learning_rate": 5.0195986888365175e-06, - "loss": 0.6628, + "epoch": 0.35, + "grad_norm": 1.7687478706740556, + "learning_rate": 7.595251117169985e-06, + "loss": 0.5645, "step": 4883 }, { - "epoch": 0.51, - "grad_norm": 2.6067456420321014, - "learning_rate": 5.017894462647383e-06, - "loss": 0.7029, + "epoch": 0.35, + "grad_norm": 1.682188511344273, + "learning_rate": 7.59426880535963e-06, + "loss": 0.5578, "step": 4884 }, { - "epoch": 0.51, - "grad_norm": 2.532052200950741, - "learning_rate": 5.0161902343793245e-06, - "loss": 0.6756, + "epoch": 0.35, + "grad_norm": 1.4856730717691378, + "learning_rate": 7.593286356511205e-06, + "loss": 0.5835, "step": 4885 }, { - "epoch": 0.51, - "grad_norm": 2.4514973771189186, - "learning_rate": 5.014486004230329e-06, - "loss": 0.6228, + "epoch": 0.35, + "grad_norm": 1.9641547793085776, + "learning_rate": 7.592303770676609e-06, + "loss": 0.5525, "step": 4886 }, { - "epoch": 0.51, - "grad_norm": 2.9144203054617583, - "learning_rate": 5.012781772398392e-06, - "loss": 0.5954, + "epoch": 0.35, + "grad_norm": 2.031810068041524, + "learning_rate": 7.591321047907743e-06, + "loss": 0.6032, "step": 4887 }, { - "epoch": 0.51, - "grad_norm": 3.5304442748367006, - "learning_rate": 5.011077539081506e-06, - "loss": 0.6309, + "epoch": 0.35, + "grad_norm": 1.6395849597046783, + "learning_rate": 7.59033818825652e-06, + "loss": 0.5009, "step": 4888 }, { - "epoch": 0.51, - "grad_norm": 2.5115992891771706, - "learning_rate": 5.009373304477663e-06, - "loss": 0.65, + "epoch": 0.35, + "grad_norm": 1.6642475310934781, + "learning_rate": 7.589355191774854e-06, + "loss": 0.4877, "step": 4889 }, { - "epoch": 0.51, - "grad_norm": 6.842298359852735, - "learning_rate": 5.007669068784857e-06, - "loss": 0.5684, + "epoch": 0.35, + "grad_norm": 1.4860254513046747, + "learning_rate": 7.588372058514675e-06, + "loss": 0.563, "step": 4890 }, { - "epoch": 0.51, - "grad_norm": 2.293124571221643, - "learning_rate": 5.005964832201079e-06, - "loss": 0.6347, + "epoch": 0.35, + "grad_norm": 1.725087415236859, + "learning_rate": 7.587388788527913e-06, + "loss": 0.5354, "step": 4891 }, { - "epoch": 0.51, - "grad_norm": 12.289205214511725, - "learning_rate": 5.004260594924327e-06, - "loss": 0.649, + "epoch": 0.35, + "grad_norm": 0.7799895665880143, + "learning_rate": 7.586405381866507e-06, + "loss": 0.4377, "step": 4892 }, { - "epoch": 0.51, - "grad_norm": 2.6036414189539125, - "learning_rate": 5.002556357152589e-06, - "loss": 0.6041, + "epoch": 0.35, + "grad_norm": 1.4600866179856686, + "learning_rate": 7.5854218385824055e-06, + "loss": 0.5038, "step": 4893 }, { - "epoch": 0.51, - "grad_norm": 2.7202200557498992, - "learning_rate": 5.000852119083863e-06, - "loss": 0.5758, + "epoch": 0.35, + "grad_norm": 1.8172038741028984, + "learning_rate": 7.584438158727561e-06, + "loss": 0.6309, "step": 4894 }, { - "epoch": 0.52, - "grad_norm": 2.769388945590125, - "learning_rate": 4.999147880916139e-06, - "loss": 0.6113, + "epoch": 0.35, + "grad_norm": 1.9468267825997616, + "learning_rate": 7.583454342353938e-06, + "loss": 0.5058, "step": 4895 }, { - "epoch": 0.52, - "grad_norm": 2.9093269736434353, - "learning_rate": 4.997443642847412e-06, - "loss": 0.737, + "epoch": 0.35, + "grad_norm": 1.9933214355196305, + "learning_rate": 7.582470389513501e-06, + "loss": 0.545, "step": 4896 }, { - "epoch": 0.52, - "grad_norm": 2.6169699031806175, - "learning_rate": 4.995739405075674e-06, - "loss": 0.6174, + "epoch": 0.35, + "grad_norm": 1.6726220062117279, + "learning_rate": 7.581486300258227e-06, + "loss": 0.5493, "step": 4897 }, { - "epoch": 0.52, - "grad_norm": 4.102025420126629, - "learning_rate": 4.994035167798921e-06, - "loss": 0.601, + "epoch": 0.35, + "grad_norm": 1.674637176576178, + "learning_rate": 7.5805020746401e-06, + "loss": 0.4693, "step": 4898 }, { - "epoch": 0.52, - "grad_norm": 2.5096200600239045, - "learning_rate": 4.992330931215146e-06, - "loss": 0.7385, + "epoch": 0.35, + "grad_norm": 2.0604815428440504, + "learning_rate": 7.579517712711111e-06, + "loss": 0.5019, "step": 4899 }, { - "epoch": 0.52, - "grad_norm": 2.5045722346398853, - "learning_rate": 4.990626695522339e-06, - "loss": 0.7197, + "epoch": 0.35, + "grad_norm": 1.8944459134209932, + "learning_rate": 7.578533214523251e-06, + "loss": 0.5593, "step": 4900 }, { - "epoch": 0.52, - "grad_norm": 2.487306202546889, - "learning_rate": 4.988922460918496e-06, - "loss": 0.5743, + "epoch": 0.35, + "grad_norm": 1.4594232564135154, + "learning_rate": 7.577548580128534e-06, + "loss": 0.5017, "step": 4901 }, { - "epoch": 0.52, - "grad_norm": 5.319440294752822, - "learning_rate": 4.98721822760161e-06, - "loss": 0.7375, + "epoch": 0.35, + "grad_norm": 1.846656522730922, + "learning_rate": 7.576563809578965e-06, + "loss": 0.5247, "step": 4902 }, { - "epoch": 0.52, - "grad_norm": 2.1289416212364505, - "learning_rate": 4.985513995769672e-06, - "loss": 0.575, + "epoch": 0.35, + "grad_norm": 0.9090368230322707, + "learning_rate": 7.575578902926567e-06, + "loss": 0.4725, "step": 4903 }, { - "epoch": 0.52, - "grad_norm": 2.504486312284673, - "learning_rate": 4.983809765620678e-06, - "loss": 0.689, + "epoch": 0.35, + "grad_norm": 1.6622083830442977, + "learning_rate": 7.574593860223362e-06, + "loss": 0.5949, "step": 4904 }, { - "epoch": 0.52, - "grad_norm": 3.312174671775115, - "learning_rate": 4.9821055373526175e-06, - "loss": 0.5768, + "epoch": 0.35, + "grad_norm": 0.7848369419778249, + "learning_rate": 7.573608681521386e-06, + "loss": 0.4676, "step": 4905 }, { - "epoch": 0.52, - "grad_norm": 3.0889029546172004, - "learning_rate": 4.980401311163483e-06, - "loss": 0.6912, + "epoch": 0.35, + "grad_norm": 1.7295308827584182, + "learning_rate": 7.57262336687268e-06, + "loss": 0.5104, "step": 4906 }, { - "epoch": 0.52, - "grad_norm": 2.355632254622633, - "learning_rate": 4.978697087251269e-06, - "loss": 0.6775, + "epoch": 0.35, + "grad_norm": 1.5370046683006775, + "learning_rate": 7.571637916329289e-06, + "loss": 0.5968, "step": 4907 }, { - "epoch": 0.52, - "grad_norm": 2.2880860706153845, - "learning_rate": 4.976992865813968e-06, - "loss": 0.6585, + "epoch": 0.35, + "grad_norm": 1.704962983040745, + "learning_rate": 7.570652329943269e-06, + "loss": 0.5496, "step": 4908 }, { - "epoch": 0.52, - "grad_norm": 1.9846108300900807, - "learning_rate": 4.975288647049566e-06, - "loss": 0.5791, + "epoch": 0.35, + "grad_norm": 1.38820426607879, + "learning_rate": 7.569666607766684e-06, + "loss": 0.5321, "step": 4909 }, { - "epoch": 0.52, - "grad_norm": 2.2342087894997813, - "learning_rate": 4.9735844311560574e-06, - "loss": 0.6124, + "epoch": 0.35, + "grad_norm": 1.90162217220586, + "learning_rate": 7.5686807498516006e-06, + "loss": 0.5489, "step": 4910 }, { - "epoch": 0.52, - "grad_norm": 1.9947124356007078, - "learning_rate": 4.9718802183314345e-06, - "loss": 0.6461, + "epoch": 0.35, + "grad_norm": 1.7105389682281333, + "learning_rate": 7.567694756250095e-06, + "loss": 0.5323, "step": 4911 }, { - "epoch": 0.52, - "grad_norm": 2.197647296374431, - "learning_rate": 4.970176008773688e-06, - "loss": 0.6122, + "epoch": 0.35, + "grad_norm": 1.4758240559101432, + "learning_rate": 7.566708627014254e-06, + "loss": 0.5442, "step": 4912 }, { - "epoch": 0.52, - "grad_norm": 0.9932317458658896, - "learning_rate": 4.9684718026808035e-06, - "loss": 0.5758, + "epoch": 0.35, + "grad_norm": 1.5167299722045913, + "learning_rate": 7.565722362196166e-06, + "loss": 0.5494, "step": 4913 }, { - "epoch": 0.52, - "grad_norm": 2.709681927841805, - "learning_rate": 4.966767600250776e-06, - "loss": 0.6107, + "epoch": 0.35, + "grad_norm": 2.0077780917890005, + "learning_rate": 7.5647359618479275e-06, + "loss": 0.6149, "step": 4914 }, { - "epoch": 0.52, - "grad_norm": 2.1508024576423437, - "learning_rate": 4.965063401681591e-06, - "loss": 0.598, + "epoch": 0.35, + "grad_norm": 0.8352886069598454, + "learning_rate": 7.563749426021645e-06, + "loss": 0.4581, "step": 4915 }, { - "epoch": 0.52, - "grad_norm": 2.146135541337258, - "learning_rate": 4.963359207171239e-06, - "loss": 0.5641, + "epoch": 0.35, + "grad_norm": 1.5483076936575557, + "learning_rate": 7.562762754769433e-06, + "loss": 0.5901, "step": 4916 }, { - "epoch": 0.52, - "grad_norm": 3.3285165686253713, - "learning_rate": 4.961655016917712e-06, - "loss": 0.7023, + "epoch": 0.35, + "grad_norm": 2.11355869300127, + "learning_rate": 7.561775948143406e-06, + "loss": 0.5562, "step": 4917 }, { - "epoch": 0.52, - "grad_norm": 3.943768718695757, - "learning_rate": 4.959950831118991e-06, - "loss": 0.6485, + "epoch": 0.35, + "grad_norm": 1.8083120094276892, + "learning_rate": 7.560789006195695e-06, + "loss": 0.5763, "step": 4918 }, { - "epoch": 0.52, - "grad_norm": 4.023540049447175, - "learning_rate": 4.958246649973066e-06, - "loss": 0.6233, + "epoch": 0.35, + "grad_norm": 1.5514303673872076, + "learning_rate": 7.55980192897843e-06, + "loss": 0.5277, "step": 4919 }, { - "epoch": 0.52, - "grad_norm": 5.000434315098372, - "learning_rate": 4.956542473677926e-06, - "loss": 0.709, + "epoch": 0.35, + "grad_norm": 1.3924644734873377, + "learning_rate": 7.5588147165437545e-06, + "loss": 0.5172, "step": 4920 }, { - "epoch": 0.52, - "grad_norm": 2.067727072792758, - "learning_rate": 4.954838302431556e-06, - "loss": 0.5751, + "epoch": 0.35, + "grad_norm": 1.5291272719523268, + "learning_rate": 7.557827368943813e-06, + "loss": 0.5186, "step": 4921 }, { - "epoch": 0.52, - "grad_norm": 2.3191980917060753, - "learning_rate": 4.9531341364319404e-06, - "loss": 0.6596, + "epoch": 0.35, + "grad_norm": 0.8064945243578632, + "learning_rate": 7.556839886230764e-06, + "loss": 0.4471, "step": 4922 }, { - "epoch": 0.52, - "grad_norm": 2.3586105417389245, - "learning_rate": 4.951429975877066e-06, - "loss": 0.6919, + "epoch": 0.35, + "grad_norm": 1.4702617807130036, + "learning_rate": 7.5558522684567685e-06, + "loss": 0.5239, "step": 4923 }, { - "epoch": 0.52, - "grad_norm": 2.1526033976465793, - "learning_rate": 4.9497258209649165e-06, - "loss": 0.6568, - "step": 4924 + "epoch": 0.35, + "grad_norm": 1.7719036413781462, + "learning_rate": 7.5548645156739956e-06, + "loss": 0.5715, + "step": 4924 }, { - "epoch": 0.52, - "grad_norm": 2.133729675007697, - "learning_rate": 4.948021671893475e-06, - "loss": 0.6752, + "epoch": 0.35, + "grad_norm": 1.572933350254439, + "learning_rate": 7.553876627934622e-06, + "loss": 0.5482, "step": 4925 }, { - "epoch": 0.52, - "grad_norm": 3.159426842721098, - "learning_rate": 4.946317528860728e-06, - "loss": 0.7144, + "epoch": 0.35, + "grad_norm": 3.066280826583247, + "learning_rate": 7.5528886052908315e-06, + "loss": 0.5032, "step": 4926 }, { - "epoch": 0.52, - "grad_norm": 2.115435574837891, - "learning_rate": 4.9446133920646535e-06, - "loss": 0.5658, + "epoch": 0.35, + "grad_norm": 2.0483900044265235, + "learning_rate": 7.551900447794815e-06, + "loss": 0.5371, "step": 4927 }, { - "epoch": 0.52, - "grad_norm": 2.7390237750782194, - "learning_rate": 4.942909261703234e-06, - "loss": 0.606, + "epoch": 0.35, + "grad_norm": 1.743186877143913, + "learning_rate": 7.550912155498769e-06, + "loss": 0.5955, "step": 4928 }, { - "epoch": 0.52, - "grad_norm": 2.8886081183240573, - "learning_rate": 4.941205137974453e-06, - "loss": 0.6459, + "epoch": 0.35, + "grad_norm": 0.8129003946479268, + "learning_rate": 7.5499237284549e-06, + "loss": 0.4724, "step": 4929 }, { - "epoch": 0.52, - "grad_norm": 2.1116737366663343, - "learning_rate": 4.93950102107629e-06, - "loss": 0.5802, + "epoch": 0.35, + "grad_norm": 1.9423746022551112, + "learning_rate": 7.548935166715417e-06, + "loss": 0.5149, "step": 4930 }, { - "epoch": 0.52, - "grad_norm": 2.26047601506244, - "learning_rate": 4.937796911206722e-06, - "loss": 0.6255, + "epoch": 0.35, + "grad_norm": 0.6529491220368296, + "learning_rate": 7.547946470332545e-06, + "loss": 0.4728, "step": 4931 }, { - "epoch": 0.52, - "grad_norm": 2.1539115192013214, - "learning_rate": 4.936092808563729e-06, - "loss": 0.6115, + "epoch": 0.35, + "grad_norm": 1.3950009606297245, + "learning_rate": 7.546957639358505e-06, + "loss": 0.5495, "step": 4932 }, { - "epoch": 0.52, - "grad_norm": 3.0145846571869437, - "learning_rate": 4.9343887133452885e-06, - "loss": 0.6485, + "epoch": 0.35, + "grad_norm": 1.6374477805869674, + "learning_rate": 7.545968673845532e-06, + "loss": 0.5225, "step": 4933 }, { - "epoch": 0.52, - "grad_norm": 3.099479126502059, - "learning_rate": 4.932684625749379e-06, - "loss": 0.6624, + "epoch": 0.35, + "grad_norm": 1.7583654086060343, + "learning_rate": 7.5449795738458675e-06, + "loss": 0.6055, "step": 4934 }, { - "epoch": 0.52, - "grad_norm": 2.5769731100724083, - "learning_rate": 4.930980545973973e-06, - "loss": 0.6308, + "epoch": 0.35, + "grad_norm": 1.4772703776075, + "learning_rate": 7.543990339411759e-06, + "loss": 0.5756, "step": 4935 }, { - "epoch": 0.52, - "grad_norm": 2.6783179427641115, - "learning_rate": 4.929276474217049e-06, - "loss": 0.6243, + "epoch": 0.35, + "grad_norm": 1.6543873807501344, + "learning_rate": 7.543000970595459e-06, + "loss": 0.5479, "step": 4936 }, { - "epoch": 0.52, - "grad_norm": 2.943547559066303, - "learning_rate": 4.92757241067658e-06, - "loss": 0.6204, + "epoch": 0.35, + "grad_norm": 1.9606074945705314, + "learning_rate": 7.542011467449231e-06, + "loss": 0.5609, "step": 4937 }, { - "epoch": 0.52, - "grad_norm": 2.3266756319552204, - "learning_rate": 4.925868355550537e-06, - "loss": 0.5994, + "epoch": 0.35, + "grad_norm": 1.7274950137747096, + "learning_rate": 7.541021830025343e-06, + "loss": 0.5861, "step": 4938 }, { - "epoch": 0.52, - "grad_norm": 2.310739073972056, - "learning_rate": 4.924164309036897e-06, - "loss": 0.6684, + "epoch": 0.35, + "grad_norm": 1.702757553198122, + "learning_rate": 7.540032058376073e-06, + "loss": 0.5358, "step": 4939 }, { - "epoch": 0.52, - "grad_norm": 2.3395607545275916, - "learning_rate": 4.922460271333627e-06, - "loss": 0.5414, + "epoch": 0.35, + "grad_norm": 0.798079476117054, + "learning_rate": 7.5390421525537035e-06, + "loss": 0.468, "step": 4940 }, { - "epoch": 0.52, - "grad_norm": 2.4676331611691746, - "learning_rate": 4.920756242638698e-06, - "loss": 0.6746, + "epoch": 0.35, + "grad_norm": 1.521626491847901, + "learning_rate": 7.538052112610523e-06, + "loss": 0.5589, "step": 4941 }, { - "epoch": 0.52, - "grad_norm": 2.2941161951403686, - "learning_rate": 4.919052223150081e-06, - "loss": 0.7604, + "epoch": 0.35, + "grad_norm": 2.229442257055242, + "learning_rate": 7.537061938598831e-06, + "loss": 0.5784, "step": 4942 }, { - "epoch": 0.52, - "grad_norm": 2.929369540693781, - "learning_rate": 4.917348213065742e-06, - "loss": 0.6284, + "epoch": 0.35, + "grad_norm": 1.6607005478757135, + "learning_rate": 7.536071630570929e-06, + "loss": 0.5224, "step": 4943 }, { - "epoch": 0.52, - "grad_norm": 2.196953658021791, - "learning_rate": 4.915644212583649e-06, - "loss": 0.5309, + "epoch": 0.35, + "grad_norm": 1.5217738803930063, + "learning_rate": 7.535081188579132e-06, + "loss": 0.6062, "step": 4944 }, { - "epoch": 0.52, - "grad_norm": 2.277862980217032, - "learning_rate": 4.913940221901766e-06, - "loss": 0.6333, + "epoch": 0.35, + "grad_norm": 1.5800854042212251, + "learning_rate": 7.534090612675755e-06, + "loss": 0.5587, "step": 4945 }, { - "epoch": 0.52, - "grad_norm": 2.1353131160650287, - "learning_rate": 4.912236241218061e-06, - "loss": 0.6933, + "epoch": 0.35, + "grad_norm": 1.4470038148668924, + "learning_rate": 7.533099902913126e-06, + "loss": 0.4779, "step": 4946 }, { - "epoch": 0.52, - "grad_norm": 2.1309552775913865, - "learning_rate": 4.910532270730497e-06, - "loss": 0.6451, + "epoch": 0.35, + "grad_norm": 1.741351604043712, + "learning_rate": 7.532109059343575e-06, + "loss": 0.5996, "step": 4947 }, { - "epoch": 0.52, - "grad_norm": 2.226377878125629, - "learning_rate": 4.908828310637031e-06, - "loss": 0.6428, + "epoch": 0.35, + "grad_norm": 1.7508441489640851, + "learning_rate": 7.531118082019446e-06, + "loss": 0.5238, "step": 4948 }, { - "epoch": 0.52, - "grad_norm": 2.218293921242868, - "learning_rate": 4.907124361135632e-06, - "loss": 0.6537, + "epoch": 0.35, + "grad_norm": 1.4991082480471094, + "learning_rate": 7.530126970993082e-06, + "loss": 0.5072, "step": 4949 }, { - "epoch": 0.52, - "grad_norm": 2.4059507731751615, - "learning_rate": 4.905420422424254e-06, - "loss": 0.5964, + "epoch": 0.35, + "grad_norm": 1.6731013834020672, + "learning_rate": 7.529135726316839e-06, + "loss": 0.5574, "step": 4950 }, { - "epoch": 0.52, - "grad_norm": 2.4481730880265014, - "learning_rate": 4.903716494700859e-06, - "loss": 0.6094, + "epoch": 0.35, + "grad_norm": 1.6476992505689012, + "learning_rate": 7.528144348043077e-06, + "loss": 0.5442, "step": 4951 }, { - "epoch": 0.52, - "grad_norm": 2.684768787067311, - "learning_rate": 4.902012578163404e-06, - "loss": 0.6083, + "epoch": 0.35, + "grad_norm": 1.5472203379236447, + "learning_rate": 7.527152836224163e-06, + "loss": 0.5864, "step": 4952 }, { - "epoch": 0.52, - "grad_norm": 1.9937131948780513, - "learning_rate": 4.900308673009843e-06, - "loss": 0.5961, + "epoch": 0.35, + "grad_norm": 1.5996551487830517, + "learning_rate": 7.526161190912473e-06, + "loss": 0.5482, "step": 4953 }, { - "epoch": 0.52, - "grad_norm": 3.274862105439873, - "learning_rate": 4.8986047794381325e-06, - "loss": 0.6355, + "epoch": 0.35, + "grad_norm": 1.483793761845596, + "learning_rate": 7.525169412160389e-06, + "loss": 0.5692, "step": 4954 }, { - "epoch": 0.52, - "grad_norm": 2.081137560086168, - "learning_rate": 4.896900897646226e-06, - "loss": 0.6181, + "epoch": 0.35, + "grad_norm": 1.5138857315791683, + "learning_rate": 7.524177500020302e-06, + "loss": 0.5507, "step": 4955 }, { - "epoch": 0.52, - "grad_norm": 2.1349551752747886, - "learning_rate": 4.8951970278320765e-06, - "loss": 0.6046, + "epoch": 0.35, + "grad_norm": 1.7583709572096526, + "learning_rate": 7.523185454544606e-06, + "loss": 0.5549, "step": 4956 }, { - "epoch": 0.52, - "grad_norm": 2.541248755489936, - "learning_rate": 4.89349317019363e-06, - "loss": 0.6012, + "epoch": 0.35, + "grad_norm": 1.5472325487721006, + "learning_rate": 7.522193275785703e-06, + "loss": 0.4801, "step": 4957 }, { - "epoch": 0.52, - "grad_norm": 2.565627078399468, - "learning_rate": 4.891789324928842e-06, - "loss": 0.6558, + "epoch": 0.35, + "grad_norm": 1.5678622229374912, + "learning_rate": 7.521200963796006e-06, + "loss": 0.6104, "step": 4958 }, { - "epoch": 0.52, - "grad_norm": 3.109453742078581, - "learning_rate": 4.890085492235657e-06, - "loss": 0.5778, + "epoch": 0.35, + "grad_norm": 1.738830652847831, + "learning_rate": 7.52020851862793e-06, + "loss": 0.5712, "step": 4959 }, { - "epoch": 0.52, - "grad_norm": 2.0050613585910755, - "learning_rate": 4.888381672312022e-06, - "loss": 0.6251, + "epoch": 0.35, + "grad_norm": 1.8820902991770343, + "learning_rate": 7.519215940333902e-06, + "loss": 0.5687, "step": 4960 }, { - "epoch": 0.52, - "grad_norm": 2.339550540816934, - "learning_rate": 4.88667786535588e-06, - "loss": 0.6687, + "epoch": 0.35, + "grad_norm": 1.520298011283955, + "learning_rate": 7.5182232289663505e-06, + "loss": 0.5066, "step": 4961 }, { - "epoch": 0.52, - "grad_norm": 2.290758913793059, - "learning_rate": 4.884974071565177e-06, - "loss": 0.6846, + "epoch": 0.35, + "grad_norm": 1.7188046530800938, + "learning_rate": 7.517230384577716e-06, + "loss": 0.5708, "step": 4962 }, { - "epoch": 0.52, - "grad_norm": 1.077911583636364, - "learning_rate": 4.883270291137852e-06, - "loss": 0.6473, + "epoch": 0.35, + "grad_norm": 1.668706866732145, + "learning_rate": 7.516237407220444e-06, + "loss": 0.5579, "step": 4963 }, { - "epoch": 0.52, - "grad_norm": 1.985960627648404, - "learning_rate": 4.88156652427185e-06, - "loss": 0.6044, + "epoch": 0.35, + "grad_norm": 1.5766489134631672, + "learning_rate": 7.515244296946984e-06, + "loss": 0.5406, "step": 4964 }, { - "epoch": 0.52, - "grad_norm": 2.305367885235591, - "learning_rate": 4.879862771165107e-06, - "loss": 0.5774, + "epoch": 0.35, + "grad_norm": 1.624373570102741, + "learning_rate": 7.514251053809798e-06, + "loss": 0.5542, "step": 4965 }, { - "epoch": 0.52, - "grad_norm": 2.4768244502117036, - "learning_rate": 4.878159032015559e-06, - "loss": 0.6634, + "epoch": 0.35, + "grad_norm": 1.558097936173771, + "learning_rate": 7.51325767786135e-06, + "loss": 0.5571, "step": 4966 }, { - "epoch": 0.52, - "grad_norm": 2.4276873504006815, - "learning_rate": 4.8764553070211415e-06, - "loss": 0.6828, + "epoch": 0.35, + "grad_norm": 1.4657423201381425, + "learning_rate": 7.5122641691541185e-06, + "loss": 0.5195, "step": 4967 }, { - "epoch": 0.52, - "grad_norm": 2.4060474857332013, - "learning_rate": 4.87475159637979e-06, - "loss": 0.6065, + "epoch": 0.35, + "grad_norm": 3.0556826498403193, + "learning_rate": 7.5112705277405794e-06, + "loss": 0.5199, "step": 4968 }, { - "epoch": 0.52, - "grad_norm": 1.7772890860805246, - "learning_rate": 4.873047900289437e-06, - "loss": 0.6808, + "epoch": 0.35, + "grad_norm": 1.4906878926598979, + "learning_rate": 7.5102767536732215e-06, + "loss": 0.5452, "step": 4969 }, { - "epoch": 0.52, - "grad_norm": 3.0625237060008588, - "learning_rate": 4.87134421894801e-06, - "loss": 0.6523, + "epoch": 0.35, + "grad_norm": 0.8167054108534888, + "learning_rate": 7.50928284700454e-06, + "loss": 0.4928, "step": 4970 }, { - "epoch": 0.52, - "grad_norm": 2.2524142418639466, - "learning_rate": 4.869640552553441e-06, - "loss": 0.6733, + "epoch": 0.35, + "grad_norm": 0.7597169214935108, + "learning_rate": 7.508288807787036e-06, + "loss": 0.4666, "step": 4971 }, { - "epoch": 0.52, - "grad_norm": 2.2414743848382006, - "learning_rate": 4.867936901303656e-06, - "loss": 0.6021, + "epoch": 0.35, + "grad_norm": 2.470837969840525, + "learning_rate": 7.507294636073219e-06, + "loss": 0.5384, "step": 4972 }, { - "epoch": 0.52, - "grad_norm": 2.338392581877551, - "learning_rate": 4.866233265396577e-06, - "loss": 0.7099, + "epoch": 0.35, + "grad_norm": 2.000337085821309, + "learning_rate": 7.506300331915601e-06, + "loss": 0.602, "step": 4973 }, { - "epoch": 0.52, - "grad_norm": 2.0935049485955495, - "learning_rate": 4.8645296450301345e-06, - "loss": 0.6684, + "epoch": 0.35, + "grad_norm": 1.851030355092836, + "learning_rate": 7.505305895366708e-06, + "loss": 0.5543, "step": 4974 }, { - "epoch": 0.52, - "grad_norm": 2.1007971339772427, - "learning_rate": 4.862826040402246e-06, - "loss": 0.6992, + "epoch": 0.35, + "grad_norm": 1.5261731235854827, + "learning_rate": 7.5043113264790675e-06, + "loss": 0.5547, "step": 4975 }, { - "epoch": 0.52, - "grad_norm": 0.9696344105471323, - "learning_rate": 4.861122451710829e-06, - "loss": 0.5749, + "epoch": 0.35, + "grad_norm": 1.7647624109233802, + "learning_rate": 7.503316625305219e-06, + "loss": 0.5402, "step": 4976 }, { - "epoch": 0.52, - "grad_norm": 2.553225906237905, - "learning_rate": 4.859418879153805e-06, - "loss": 0.7279, + "epoch": 0.35, + "grad_norm": 1.6038540336943767, + "learning_rate": 7.502321791897704e-06, + "loss": 0.4724, "step": 4977 }, { - "epoch": 0.52, - "grad_norm": 2.067442914319545, - "learning_rate": 4.857715322929091e-06, - "loss": 0.6576, + "epoch": 0.35, + "grad_norm": 1.5973719668418904, + "learning_rate": 7.501326826309072e-06, + "loss": 0.5271, "step": 4978 }, { - "epoch": 0.52, - "grad_norm": 2.149325406618078, - "learning_rate": 4.856011783234599e-06, - "loss": 0.7101, + "epoch": 0.35, + "grad_norm": 1.8515615049882899, + "learning_rate": 7.500331728591881e-06, + "loss": 0.58, "step": 4979 }, { - "epoch": 0.52, - "grad_norm": 1.9706548407978104, - "learning_rate": 4.85430826026824e-06, - "loss": 0.6659, + "epoch": 0.35, + "grad_norm": 1.5997308167338569, + "learning_rate": 7.499336498798696e-06, + "loss": 0.5913, "step": 4980 }, { - "epoch": 0.52, - "grad_norm": 2.6696818093641395, - "learning_rate": 4.852604754227927e-06, - "loss": 0.6047, + "epoch": 0.35, + "grad_norm": 2.969080838798194, + "learning_rate": 7.498341136982088e-06, + "loss": 0.4946, "step": 4981 }, { - "epoch": 0.52, - "grad_norm": 3.3802463897178785, - "learning_rate": 4.8509012653115695e-06, - "loss": 0.7413, + "epoch": 0.35, + "grad_norm": 1.57953840276802, + "learning_rate": 7.497345643194637e-06, + "loss": 0.6232, "step": 4982 }, { - "epoch": 0.52, - "grad_norm": 2.1818945622881833, - "learning_rate": 4.849197793717069e-06, - "loss": 0.5983, + "epoch": 0.35, + "grad_norm": 1.9067978957706715, + "learning_rate": 7.496350017488926e-06, + "loss": 0.6135, "step": 4983 }, { - "epoch": 0.52, - "grad_norm": 2.14901122734723, - "learning_rate": 4.847494339642334e-06, - "loss": 0.7177, + "epoch": 0.35, + "grad_norm": 1.9838491387632888, + "learning_rate": 7.495354259917549e-06, + "loss": 0.5722, "step": 4984 }, { - "epoch": 0.52, - "grad_norm": 2.918569152656593, - "learning_rate": 4.8457909032852654e-06, - "loss": 0.6908, + "epoch": 0.35, + "grad_norm": 2.164894351400883, + "learning_rate": 7.494358370533103e-06, + "loss": 0.619, "step": 4985 }, { - "epoch": 0.52, - "grad_norm": 0.9406184537970192, - "learning_rate": 4.844087484843764e-06, - "loss": 0.6133, + "epoch": 0.35, + "grad_norm": 1.4831946622568706, + "learning_rate": 7.493362349388197e-06, + "loss": 0.515, "step": 4986 }, { - "epoch": 0.52, - "grad_norm": 2.0469495720368256, - "learning_rate": 4.84238408451573e-06, - "loss": 0.6769, + "epoch": 0.35, + "grad_norm": 1.6261291645636096, + "learning_rate": 7.492366196535444e-06, + "loss": 0.5556, "step": 4987 }, { - "epoch": 0.52, - "grad_norm": 2.7726823392613116, - "learning_rate": 4.840680702499056e-06, - "loss": 0.6546, + "epoch": 0.35, + "grad_norm": 1.7195571400701262, + "learning_rate": 7.491369912027461e-06, + "loss": 0.5558, "step": 4988 }, { - "epoch": 0.52, - "grad_norm": 2.272990604413833, - "learning_rate": 4.838977338991636e-06, - "loss": 0.647, + "epoch": 0.35, + "grad_norm": 1.3308378816201414, + "learning_rate": 7.490373495916877e-06, + "loss": 0.5835, "step": 4989 }, { - "epoch": 0.53, - "grad_norm": 2.9330631724535494, - "learning_rate": 4.837273994191364e-06, - "loss": 0.6251, + "epoch": 0.35, + "grad_norm": 2.3276485125290676, + "learning_rate": 7.489376948256329e-06, + "loss": 0.5226, "step": 4990 }, { - "epoch": 0.53, - "grad_norm": 2.2631226425982933, - "learning_rate": 4.835570668296131e-06, - "loss": 0.676, + "epoch": 0.35, + "grad_norm": 0.803752856707921, + "learning_rate": 7.4883802690984545e-06, + "loss": 0.4386, "step": 4991 }, { - "epoch": 0.53, - "grad_norm": 2.13674093049108, - "learning_rate": 4.83386736150382e-06, - "loss": 0.6693, + "epoch": 0.35, + "grad_norm": 1.4030465915722392, + "learning_rate": 7.4873834584959e-06, + "loss": 0.4931, "step": 4992 }, { - "epoch": 0.53, - "grad_norm": 2.133284920671136, - "learning_rate": 4.83216407401232e-06, - "loss": 0.5922, + "epoch": 0.35, + "grad_norm": 2.1789682929891505, + "learning_rate": 7.486386516501326e-06, + "loss": 0.5645, "step": 4993 }, { - "epoch": 0.53, - "grad_norm": 2.520394477013768, - "learning_rate": 4.830460806019512e-06, - "loss": 0.6195, + "epoch": 0.35, + "grad_norm": 1.5364438425027147, + "learning_rate": 7.48538944316739e-06, + "loss": 0.5207, "step": 4994 }, { - "epoch": 0.53, - "grad_norm": 2.693745272656559, - "learning_rate": 4.828757557723279e-06, - "loss": 0.6932, + "epoch": 0.35, + "grad_norm": 0.7765283163650484, + "learning_rate": 7.484392238546761e-06, + "loss": 0.4441, "step": 4995 }, { - "epoch": 0.53, - "grad_norm": 2.4450291798500845, - "learning_rate": 4.827054329321496e-06, - "loss": 0.537, + "epoch": 0.35, + "grad_norm": 1.85902299831688, + "learning_rate": 7.483394902692117e-06, + "loss": 0.578, "step": 4996 }, { - "epoch": 0.53, - "grad_norm": 2.0458948423518097, - "learning_rate": 4.825351121012042e-06, - "loss": 0.6444, + "epoch": 0.35, + "grad_norm": 1.703086643345389, + "learning_rate": 7.482397435656139e-06, + "loss": 0.5661, "step": 4997 }, { - "epoch": 0.53, - "grad_norm": 2.6712241240805357, - "learning_rate": 4.823647932992788e-06, - "loss": 0.6205, + "epoch": 0.35, + "grad_norm": 1.7654416810941216, + "learning_rate": 7.481399837491517e-06, + "loss": 0.5658, "step": 4998 }, { - "epoch": 0.53, - "grad_norm": 2.207121252476882, - "learning_rate": 4.82194476546161e-06, - "loss": 0.5541, + "epoch": 0.35, + "grad_norm": 1.5767385890224384, + "learning_rate": 7.480402108250948e-06, + "loss": 0.5807, "step": 4999 }, { - "epoch": 0.53, - "grad_norm": 2.289069632699829, - "learning_rate": 4.820241618616375e-06, - "loss": 0.544, + "epoch": 0.35, + "grad_norm": 1.7257442171893418, + "learning_rate": 7.4794042479871345e-06, + "loss": 0.6021, "step": 5000 }, { - "epoch": 0.53, - "grad_norm": 2.2675898245567687, - "learning_rate": 4.818538492654947e-06, - "loss": 0.6852, + "epoch": 0.35, + "grad_norm": 1.470674433630983, + "learning_rate": 7.478406256752786e-06, + "loss": 0.5992, "step": 5001 }, { - "epoch": 0.53, - "grad_norm": 2.343735840653348, - "learning_rate": 4.816835387775193e-06, - "loss": 0.6876, + "epoch": 0.35, + "grad_norm": 2.085562794425816, + "learning_rate": 7.477408134600623e-06, + "loss": 0.5981, "step": 5002 }, { - "epoch": 0.53, - "grad_norm": 2.246807379369579, - "learning_rate": 4.8151323041749734e-06, - "loss": 0.6657, + "epoch": 0.36, + "grad_norm": 2.3001738224890094, + "learning_rate": 7.476409881583367e-06, + "loss": 0.5261, "step": 5003 }, { - "epoch": 0.53, - "grad_norm": 2.4237118515960447, - "learning_rate": 4.8134292420521505e-06, - "loss": 0.6224, + "epoch": 0.36, + "grad_norm": 1.7480149260977138, + "learning_rate": 7.4754114977537485e-06, + "loss": 0.524, "step": 5004 }, { - "epoch": 0.53, - "grad_norm": 2.289453411888955, - "learning_rate": 4.811726201604576e-06, - "loss": 0.655, + "epoch": 0.36, + "grad_norm": 1.6453381850079216, + "learning_rate": 7.474412983164508e-06, + "loss": 0.5576, "step": 5005 }, { - "epoch": 0.53, - "grad_norm": 2.0026507977297503, - "learning_rate": 4.810023183030109e-06, - "loss": 0.6169, + "epoch": 0.36, + "grad_norm": 1.5752315524718463, + "learning_rate": 7.47341433786839e-06, + "loss": 0.6071, "step": 5006 }, { - "epoch": 0.53, - "grad_norm": 3.0079499431943195, - "learning_rate": 4.808320186526599e-06, - "loss": 0.6452, + "epoch": 0.36, + "grad_norm": 1.7821339909280394, + "learning_rate": 7.4724155619181446e-06, + "loss": 0.6018, "step": 5007 }, { - "epoch": 0.53, - "grad_norm": 2.2628812402524896, - "learning_rate": 4.806617212291898e-06, - "loss": 0.6522, + "epoch": 0.36, + "grad_norm": 1.7951400187128286, + "learning_rate": 7.4714166553665324e-06, + "loss": 0.6187, "step": 5008 }, { - "epoch": 0.53, - "grad_norm": 2.2505910057014997, - "learning_rate": 4.804914260523847e-06, - "loss": 0.5728, + "epoch": 0.36, + "grad_norm": 2.4237563030985143, + "learning_rate": 7.470417618266317e-06, + "loss": 0.6096, "step": 5009 }, { - "epoch": 0.53, - "grad_norm": 2.2225142986972495, - "learning_rate": 4.803211331420294e-06, - "loss": 0.636, + "epoch": 0.36, + "grad_norm": 1.5264341324406885, + "learning_rate": 7.469418450670273e-06, + "loss": 0.5515, "step": 5010 }, { - "epoch": 0.53, - "grad_norm": 2.5157421199340506, - "learning_rate": 4.801508425179079e-06, - "loss": 0.6838, + "epoch": 0.36, + "grad_norm": 1.7397670980544666, + "learning_rate": 7.468419152631178e-06, + "loss": 0.5732, "step": 5011 }, { - "epoch": 0.53, - "grad_norm": 3.362812838563486, - "learning_rate": 4.799805541998042e-06, - "loss": 0.6514, + "epoch": 0.36, + "grad_norm": 1.4520366138100707, + "learning_rate": 7.467419724201821e-06, + "loss": 0.5992, "step": 5012 }, { - "epoch": 0.53, - "grad_norm": 2.460937036790075, - "learning_rate": 4.79810268207502e-06, - "loss": 0.63, + "epoch": 0.36, + "grad_norm": 1.8042731704910568, + "learning_rate": 7.466420165434991e-06, + "loss": 0.5603, "step": 5013 }, { - "epoch": 0.53, - "grad_norm": 3.3836688933373873, - "learning_rate": 4.796399845607844e-06, - "loss": 0.6927, + "epoch": 0.36, + "grad_norm": 1.4563376794033969, + "learning_rate": 7.465420476383492e-06, + "loss": 0.4738, "step": 5014 }, { - "epoch": 0.53, - "grad_norm": 2.1163838752069926, - "learning_rate": 4.7946970327943435e-06, - "loss": 0.6223, + "epoch": 0.36, + "grad_norm": 1.8855333052266465, + "learning_rate": 7.4644206571001285e-06, + "loss": 0.5346, "step": 5015 }, { - "epoch": 0.53, - "grad_norm": 2.4368026381336314, - "learning_rate": 4.79299424383235e-06, - "loss": 0.5904, + "epoch": 0.36, + "grad_norm": 1.630831424369212, + "learning_rate": 7.463420707637718e-06, + "loss": 0.584, "step": 5016 }, { - "epoch": 0.53, - "grad_norm": 2.0227052397175225, - "learning_rate": 4.791291478919688e-06, - "loss": 0.6301, + "epoch": 0.36, + "grad_norm": 1.8066907695212817, + "learning_rate": 7.462420628049075e-06, + "loss": 0.5524, "step": 5017 }, { - "epoch": 0.53, - "grad_norm": 3.4149521326579464, - "learning_rate": 4.789588738254176e-06, - "loss": 0.6325, + "epoch": 0.36, + "grad_norm": 1.8579376176384792, + "learning_rate": 7.461420418387032e-06, + "loss": 0.5949, "step": 5018 }, { - "epoch": 0.53, - "grad_norm": 2.03492009399757, - "learning_rate": 4.787886022033637e-06, - "loss": 0.6345, + "epoch": 0.36, + "grad_norm": 1.6150997345575844, + "learning_rate": 7.460420078704422e-06, + "loss": 0.5222, "step": 5019 }, { - "epoch": 0.53, - "grad_norm": 2.045894564408748, - "learning_rate": 4.786183330455886e-06, - "loss": 0.6108, + "epoch": 0.36, + "grad_norm": 1.621533932985966, + "learning_rate": 7.459419609054086e-06, + "loss": 0.4616, "step": 5020 }, { - "epoch": 0.53, - "grad_norm": 2.0734976435360384, - "learning_rate": 4.784480663718742e-06, - "loss": 0.5606, + "epoch": 0.36, + "grad_norm": 1.634698922668146, + "learning_rate": 7.458419009488873e-06, + "loss": 0.5806, "step": 5021 }, { - "epoch": 0.53, - "grad_norm": 4.08406849984304, - "learning_rate": 4.782778022020006e-06, - "loss": 0.6312, + "epoch": 0.36, + "grad_norm": 1.5735826515374225, + "learning_rate": 7.457418280061638e-06, + "loss": 0.5472, "step": 5022 }, { - "epoch": 0.53, - "grad_norm": 2.8678914982078667, - "learning_rate": 4.7810754055574945e-06, - "loss": 0.6431, + "epoch": 0.36, + "grad_norm": 2.3356321576567396, + "learning_rate": 7.456417420825243e-06, + "loss": 0.5784, "step": 5023 }, { - "epoch": 0.53, - "grad_norm": 2.010459684826099, - "learning_rate": 4.779372814529008e-06, - "loss": 0.6737, + "epoch": 0.36, + "grad_norm": 1.636215566831808, + "learning_rate": 7.455416431832556e-06, + "loss": 0.5013, "step": 5024 }, { - "epoch": 0.53, - "grad_norm": 2.274385411512584, - "learning_rate": 4.7776702491323506e-06, - "loss": 0.6473, + "epoch": 0.36, + "grad_norm": 1.8152254355469455, + "learning_rate": 7.454415313136451e-06, + "loss": 0.55, "step": 5025 }, { - "epoch": 0.53, - "grad_norm": 2.4893768635552216, - "learning_rate": 4.775967709565323e-06, - "loss": 0.5868, + "epoch": 0.36, + "grad_norm": 1.9195200821397498, + "learning_rate": 7.453414064789815e-06, + "loss": 0.5575, "step": 5026 }, { - "epoch": 0.53, - "grad_norm": 2.8594743184322855, - "learning_rate": 4.774265196025716e-06, - "loss": 0.6205, + "epoch": 0.36, + "grad_norm": 1.7419063276022502, + "learning_rate": 7.452412686845533e-06, + "loss": 0.5226, "step": 5027 }, { - "epoch": 0.53, - "grad_norm": 3.0896156585418804, - "learning_rate": 4.772562708711328e-06, - "loss": 0.5436, + "epoch": 0.36, + "grad_norm": 1.4713389483001647, + "learning_rate": 7.451411179356505e-06, + "loss": 0.471, "step": 5028 }, { - "epoch": 0.53, - "grad_norm": 2.1024636760857023, - "learning_rate": 4.770860247819946e-06, - "loss": 0.6145, + "epoch": 0.36, + "grad_norm": 0.7792035267372587, + "learning_rate": 7.450409542375632e-06, + "loss": 0.438, "step": 5029 }, { - "epoch": 0.53, - "grad_norm": 2.7483433705268467, - "learning_rate": 4.7691578135493595e-06, - "loss": 0.6789, + "epoch": 0.36, + "grad_norm": 2.08646622354803, + "learning_rate": 7.449407775955822e-06, + "loss": 0.5495, "step": 5030 }, { - "epoch": 0.53, - "grad_norm": 3.7267937561387456, - "learning_rate": 4.76745540609735e-06, - "loss": 0.651, + "epoch": 0.36, + "grad_norm": 1.5105509580413656, + "learning_rate": 7.448405880149995e-06, + "loss": 0.6264, "step": 5031 }, { - "epoch": 0.53, - "grad_norm": 0.930921651098154, - "learning_rate": 4.765753025661699e-06, - "loss": 0.5867, + "epoch": 0.36, + "grad_norm": 1.8098579051965586, + "learning_rate": 7.447403855011072e-06, + "loss": 0.6302, "step": 5032 }, { - "epoch": 0.53, - "grad_norm": 2.0301984146112435, - "learning_rate": 4.764050672440184e-06, - "loss": 0.633, + "epoch": 0.36, + "grad_norm": 1.8119766172667833, + "learning_rate": 7.446401700591983e-06, + "loss": 0.5902, "step": 5033 }, { - "epoch": 0.53, - "grad_norm": 3.3826775767799564, - "learning_rate": 4.76234834663058e-06, - "loss": 0.6399, + "epoch": 0.36, + "grad_norm": 0.7056572005983928, + "learning_rate": 7.4453994169456686e-06, + "loss": 0.4254, "step": 5034 }, { - "epoch": 0.53, - "grad_norm": 2.648287363125826, - "learning_rate": 4.76064604843066e-06, - "loss": 0.6734, + "epoch": 0.36, + "grad_norm": 1.6681456314143854, + "learning_rate": 7.444397004125071e-06, + "loss": 0.6201, "step": 5035 }, { - "epoch": 0.53, - "grad_norm": 2.6257884700520076, - "learning_rate": 4.758943778038189e-06, - "loss": 0.5742, + "epoch": 0.36, + "grad_norm": 1.5649564758407886, + "learning_rate": 7.4433944621831414e-06, + "loss": 0.5959, "step": 5036 }, { - "epoch": 0.53, - "grad_norm": 2.790385480712019, - "learning_rate": 4.757241535650931e-06, - "loss": 0.5854, + "epoch": 0.36, + "grad_norm": 1.9541138939854987, + "learning_rate": 7.442391791172836e-06, + "loss": 0.5296, "step": 5037 }, { - "epoch": 0.53, - "grad_norm": 2.3321216810115755, - "learning_rate": 4.755539321466652e-06, - "loss": 0.7305, + "epoch": 0.36, + "grad_norm": 1.5801222628015292, + "learning_rate": 7.44138899114712e-06, + "loss": 0.5183, "step": 5038 }, { - "epoch": 0.53, - "grad_norm": 2.470106783507069, - "learning_rate": 4.753837135683108e-06, - "loss": 0.7199, + "epoch": 0.36, + "grad_norm": 1.4941390415153633, + "learning_rate": 7.4403860621589665e-06, + "loss": 0.5587, "step": 5039 }, { - "epoch": 0.53, - "grad_norm": 2.519684963476052, - "learning_rate": 4.752134978498052e-06, - "loss": 0.6208, + "epoch": 0.36, + "grad_norm": 1.4681971076706204, + "learning_rate": 7.43938300426135e-06, + "loss": 0.5083, "step": 5040 }, { - "epoch": 0.53, - "grad_norm": 1.9933322283576163, - "learning_rate": 4.750432850109239e-06, - "loss": 0.5654, + "epoch": 0.36, + "grad_norm": 0.7013568903544598, + "learning_rate": 7.438379817507257e-06, + "loss": 0.4511, "step": 5041 }, { - "epoch": 0.53, - "grad_norm": 2.1628910176613396, - "learning_rate": 4.748730750714417e-06, - "loss": 0.5597, + "epoch": 0.36, + "grad_norm": 1.5783403908304015, + "learning_rate": 7.437376501949683e-06, + "loss": 0.5132, "step": 5042 }, { - "epoch": 0.53, - "grad_norm": 3.2601189066847938, - "learning_rate": 4.74702868051133e-06, - "loss": 0.5868, + "epoch": 0.36, + "grad_norm": 1.5612692797430152, + "learning_rate": 7.43637305764162e-06, + "loss": 0.5409, "step": 5043 }, { - "epoch": 0.53, - "grad_norm": 2.4445761523300917, - "learning_rate": 4.745326639697718e-06, - "loss": 0.6006, + "epoch": 0.36, + "grad_norm": 1.6476981173971175, + "learning_rate": 7.435369484636079e-06, + "loss": 0.56, "step": 5044 }, { - "epoch": 0.53, - "grad_norm": 2.157421144865131, - "learning_rate": 4.743624628471322e-06, - "loss": 0.5829, + "epoch": 0.36, + "grad_norm": 1.4983924004016416, + "learning_rate": 7.434365782986068e-06, + "loss": 0.5096, "step": 5045 }, { - "epoch": 0.53, - "grad_norm": 10.608165987972704, - "learning_rate": 4.741922647029873e-06, - "loss": 0.5994, + "epoch": 0.36, + "grad_norm": 1.7208222242592959, + "learning_rate": 7.433361952744609e-06, + "loss": 0.5943, "step": 5046 }, { - "epoch": 0.53, - "grad_norm": 2.6959202608616892, - "learning_rate": 4.740220695571108e-06, - "loss": 0.7195, + "epoch": 0.36, + "grad_norm": 0.8198055297706212, + "learning_rate": 7.432357993964724e-06, + "loss": 0.4455, "step": 5047 }, { - "epoch": 0.53, - "grad_norm": 2.374505085575942, - "learning_rate": 4.738518774292752e-06, - "loss": 0.5836, + "epoch": 0.36, + "grad_norm": 1.556242319986356, + "learning_rate": 7.43135390669945e-06, + "loss": 0.5933, "step": 5048 }, { - "epoch": 0.53, - "grad_norm": 2.4217957922423077, - "learning_rate": 4.736816883392527e-06, - "loss": 0.5942, + "epoch": 0.36, + "grad_norm": 1.3736043359517, + "learning_rate": 7.430349691001823e-06, + "loss": 0.5102, "step": 5049 }, { - "epoch": 0.53, - "grad_norm": 3.6160178947490085, - "learning_rate": 4.735115023068155e-06, - "loss": 0.6114, + "epoch": 0.36, + "grad_norm": 1.7877012383737436, + "learning_rate": 7.42934534692489e-06, + "loss": 0.5718, "step": 5050 }, { - "epoch": 0.53, - "grad_norm": 1.9430364282301278, - "learning_rate": 4.733413193517355e-06, - "loss": 0.6125, + "epoch": 0.36, + "grad_norm": 1.9208428585245887, + "learning_rate": 7.428340874521705e-06, + "loss": 0.593, "step": 5051 }, { - "epoch": 0.53, - "grad_norm": 2.886628277780551, - "learning_rate": 4.731711394937842e-06, - "loss": 0.6733, + "epoch": 0.36, + "grad_norm": 1.5911293365793935, + "learning_rate": 7.427336273845325e-06, + "loss": 0.5671, "step": 5052 }, { - "epoch": 0.53, - "grad_norm": 2.7819669075459643, - "learning_rate": 4.73000962752732e-06, - "loss": 0.607, + "epoch": 0.36, + "grad_norm": 1.5519185520736019, + "learning_rate": 7.42633154494882e-06, + "loss": 0.516, "step": 5053 }, { - "epoch": 0.53, - "grad_norm": 3.3962133589136707, - "learning_rate": 4.7283078914835e-06, - "loss": 0.6421, + "epoch": 0.36, + "grad_norm": 0.8277417269174736, + "learning_rate": 7.42532668788526e-06, + "loss": 0.4422, "step": 5054 }, { - "epoch": 0.53, - "grad_norm": 2.8324268366863623, - "learning_rate": 4.726606187004084e-06, - "loss": 0.6786, + "epoch": 0.36, + "grad_norm": 1.5931974197039245, + "learning_rate": 7.424321702707726e-06, + "loss": 0.5523, "step": 5055 }, { - "epoch": 0.53, - "grad_norm": 3.009696361121063, - "learning_rate": 4.724904514286773e-06, - "loss": 0.6692, + "epoch": 0.36, + "grad_norm": 1.4981885077819668, + "learning_rate": 7.423316589469305e-06, + "loss": 0.5391, "step": 5056 }, { - "epoch": 0.53, - "grad_norm": 2.6870058183715755, - "learning_rate": 4.723202873529256e-06, - "loss": 0.6067, + "epoch": 0.36, + "grad_norm": 1.86737196040675, + "learning_rate": 7.4223113482230925e-06, + "loss": 0.5243, "step": 5057 }, { - "epoch": 0.53, - "grad_norm": 2.358795360202302, - "learning_rate": 4.721501264929232e-06, - "loss": 0.6416, + "epoch": 0.36, + "grad_norm": 1.5735333110925382, + "learning_rate": 7.421305979022184e-06, + "loss": 0.5844, "step": 5058 }, { - "epoch": 0.53, - "grad_norm": 2.2459402545172984, - "learning_rate": 4.719799688684385e-06, - "loss": 0.7759, + "epoch": 0.36, + "grad_norm": 1.8936972502792857, + "learning_rate": 7.420300481919691e-06, + "loss": 0.5098, "step": 5059 }, { - "epoch": 0.53, - "grad_norm": 2.1705987722229483, - "learning_rate": 4.7180981449924006e-06, - "loss": 0.5818, + "epoch": 0.36, + "grad_norm": 1.7091201250011898, + "learning_rate": 7.419294856968725e-06, + "loss": 0.5512, "step": 5060 }, { - "epoch": 0.53, - "grad_norm": 2.764535131155558, - "learning_rate": 4.716396634050959e-06, - "loss": 0.6444, + "epoch": 0.36, + "grad_norm": 1.6893090494284482, + "learning_rate": 7.418289104222407e-06, + "loss": 0.4997, "step": 5061 }, { - "epoch": 0.53, - "grad_norm": 2.7065511765293717, - "learning_rate": 4.714695156057737e-06, - "loss": 0.7239, + "epoch": 0.36, + "grad_norm": 1.3652340188441736, + "learning_rate": 7.417283223733863e-06, + "loss": 0.5523, "step": 5062 }, { - "epoch": 0.53, - "grad_norm": 2.614545604700766, - "learning_rate": 4.712993711210405e-06, - "loss": 0.5962, + "epoch": 0.36, + "grad_norm": 0.7963990861831172, + "learning_rate": 7.41627721555623e-06, + "loss": 0.4497, "step": 5063 }, { - "epoch": 0.53, - "grad_norm": 3.2589815632805568, - "learning_rate": 4.711292299706636e-06, - "loss": 0.6851, + "epoch": 0.36, + "grad_norm": 2.0385136015943415, + "learning_rate": 7.415271079742645e-06, + "loss": 0.483, "step": 5064 }, { - "epoch": 0.53, - "grad_norm": 2.695743902716769, - "learning_rate": 4.709590921744093e-06, - "loss": 0.5757, + "epoch": 0.36, + "grad_norm": 1.7041267938636007, + "learning_rate": 7.414264816346259e-06, + "loss": 0.6181, "step": 5065 }, { - "epoch": 0.53, - "grad_norm": 3.0665293362792982, - "learning_rate": 4.707889577520436e-06, - "loss": 0.6153, + "epoch": 0.36, + "grad_norm": 1.9583146215173481, + "learning_rate": 7.413258425420226e-06, + "loss": 0.5504, "step": 5066 }, { - "epoch": 0.53, - "grad_norm": 2.6924889301708195, - "learning_rate": 4.706188267233324e-06, - "loss": 0.6596, + "epoch": 0.36, + "grad_norm": 2.083799444491867, + "learning_rate": 7.412251907017703e-06, + "loss": 0.5848, "step": 5067 }, { - "epoch": 0.53, - "grad_norm": 2.3595683992153256, - "learning_rate": 4.704486991080409e-06, - "loss": 0.6294, + "epoch": 0.36, + "grad_norm": 1.4516612646830118, + "learning_rate": 7.411245261191863e-06, + "loss": 0.515, "step": 5068 }, { - "epoch": 0.53, - "grad_norm": 2.669021302249065, - "learning_rate": 4.7027857492593445e-06, - "loss": 0.6792, + "epoch": 0.36, + "grad_norm": 1.861412295888898, + "learning_rate": 7.4102384879958754e-06, + "loss": 0.4733, "step": 5069 }, { - "epoch": 0.53, - "grad_norm": 1.0161808520148305, - "learning_rate": 4.701084541967769e-06, - "loss": 0.5579, + "epoch": 0.36, + "grad_norm": 0.8219853933668342, + "learning_rate": 7.409231587482925e-06, + "loss": 0.4539, "step": 5070 }, { - "epoch": 0.53, - "grad_norm": 2.2988515242704346, - "learning_rate": 4.699383369403329e-06, - "loss": 0.6003, + "epoch": 0.36, + "grad_norm": 1.7464077437141738, + "learning_rate": 7.408224559706199e-06, + "loss": 0.5567, "step": 5071 }, { - "epoch": 0.53, - "grad_norm": 2.252022330411591, - "learning_rate": 4.697682231763658e-06, - "loss": 0.6515, + "epoch": 0.36, + "grad_norm": 0.8413106830065632, + "learning_rate": 7.407217404718891e-06, + "loss": 0.44, "step": 5072 }, { - "epoch": 0.53, - "grad_norm": 2.0233341986752094, - "learning_rate": 4.695981129246393e-06, - "loss": 0.5974, + "epoch": 0.36, + "grad_norm": 1.5787535642085477, + "learning_rate": 7.406210122574203e-06, + "loss": 0.5569, "step": 5073 }, { - "epoch": 0.53, - "grad_norm": 2.2983932146999533, - "learning_rate": 4.694280062049163e-06, - "loss": 0.5946, + "epoch": 0.36, + "grad_norm": 1.8973360122951102, + "learning_rate": 7.4052027133253435e-06, + "loss": 0.5952, "step": 5074 }, { - "epoch": 0.53, - "grad_norm": 3.894694889398997, - "learning_rate": 4.6925790303695886e-06, - "loss": 0.5984, + "epoch": 0.36, + "grad_norm": 1.6898192832409547, + "learning_rate": 7.404195177025526e-06, + "loss": 0.5704, "step": 5075 }, { - "epoch": 0.53, - "grad_norm": 2.374417964315601, - "learning_rate": 4.690878034405296e-06, - "loss": 0.5768, + "epoch": 0.36, + "grad_norm": 1.815609419259841, + "learning_rate": 7.403187513727973e-06, + "loss": 0.5953, "step": 5076 }, { - "epoch": 0.53, - "grad_norm": 2.310052580071245, - "learning_rate": 4.689177074353899e-06, - "loss": 0.559, + "epoch": 0.36, + "grad_norm": 1.595122569914117, + "learning_rate": 7.402179723485912e-06, + "loss": 0.4757, "step": 5077 }, { - "epoch": 0.53, - "grad_norm": 2.4835325819520286, - "learning_rate": 4.687476150413012e-06, - "loss": 0.6051, + "epoch": 0.36, + "grad_norm": 1.7779299929072703, + "learning_rate": 7.401171806352579e-06, + "loss": 0.5509, "step": 5078 }, { - "epoch": 0.53, - "grad_norm": 2.57984204212142, - "learning_rate": 4.6857752627802405e-06, - "loss": 0.5899, + "epoch": 0.36, + "grad_norm": 1.4698184275334876, + "learning_rate": 7.400163762381215e-06, + "loss": 0.5592, "step": 5079 }, { - "epoch": 0.53, - "grad_norm": 2.6143636376389408, - "learning_rate": 4.684074411653192e-06, - "loss": 0.6276, + "epoch": 0.36, + "grad_norm": 5.377279200177793, + "learning_rate": 7.399155591625069e-06, + "loss": 0.6379, "step": 5080 }, { - "epoch": 0.53, - "grad_norm": 2.065161043806273, - "learning_rate": 4.682373597229464e-06, - "loss": 0.6279, + "epoch": 0.36, + "grad_norm": 1.8352221790576715, + "learning_rate": 7.398147294137393e-06, + "loss": 0.6796, "step": 5081 }, { - "epoch": 0.53, - "grad_norm": 4.501763492850309, - "learning_rate": 4.680672819706654e-06, - "loss": 0.681, + "epoch": 0.36, + "grad_norm": 1.9620228817585146, + "learning_rate": 7.397138869971452e-06, + "loss": 0.5192, "step": 5082 }, { - "epoch": 0.53, - "grad_norm": 2.563624459964035, - "learning_rate": 4.678972079282354e-06, - "loss": 0.64, + "epoch": 0.36, + "grad_norm": 1.4711246197982666, + "learning_rate": 7.396130319180514e-06, + "loss": 0.5519, "step": 5083 }, { - "epoch": 0.53, - "grad_norm": 2.0259420193680757, - "learning_rate": 4.677271376154149e-06, - "loss": 0.7082, + "epoch": 0.36, + "grad_norm": 1.6330037349870214, + "learning_rate": 7.395121641817852e-06, + "loss": 0.5479, "step": 5084 }, { - "epoch": 0.54, - "grad_norm": 3.20008658827479, - "learning_rate": 4.6755707105196204e-06, - "loss": 0.7601, + "epoch": 0.36, + "grad_norm": 1.574900373738006, + "learning_rate": 7.394112837936749e-06, + "loss": 0.501, "step": 5085 }, { - "epoch": 0.54, - "grad_norm": 2.5221950538299738, - "learning_rate": 4.673870082576351e-06, - "loss": 0.5972, + "epoch": 0.36, + "grad_norm": 2.9529349124845603, + "learning_rate": 7.393103907590494e-06, + "loss": 0.5827, "step": 5086 }, { - "epoch": 0.54, - "grad_norm": 2.312808929084742, - "learning_rate": 4.672169492521914e-06, - "loss": 0.6986, + "epoch": 0.36, + "grad_norm": 2.3026327734499556, + "learning_rate": 7.392094850832383e-06, + "loss": 0.511, "step": 5087 }, { - "epoch": 0.54, - "grad_norm": 3.5730283833471397, - "learning_rate": 4.670468940553875e-06, - "loss": 0.6108, + "epoch": 0.36, + "grad_norm": 1.4267357343134968, + "learning_rate": 7.391085667715715e-06, + "loss": 0.4688, "step": 5088 }, { - "epoch": 0.54, - "grad_norm": 5.6696729783798485, - "learning_rate": 4.6687684268698034e-06, - "loss": 0.6344, + "epoch": 0.36, + "grad_norm": 1.5429265988950776, + "learning_rate": 7.390076358293801e-06, + "loss": 0.5135, "step": 5089 }, { - "epoch": 0.54, - "grad_norm": 3.465514843046939, - "learning_rate": 4.667067951667256e-06, - "loss": 0.622, + "epoch": 0.36, + "grad_norm": 1.8774359695551555, + "learning_rate": 7.389066922619954e-06, + "loss": 0.5548, "step": 5090 }, { - "epoch": 0.54, - "grad_norm": 1.13181319546967, - "learning_rate": 4.665367515143797e-06, - "loss": 0.5712, + "epoch": 0.36, + "grad_norm": 1.5076159229903583, + "learning_rate": 7.388057360747497e-06, + "loss": 0.5399, "step": 5091 }, { - "epoch": 0.54, - "grad_norm": 2.425783344218305, - "learning_rate": 4.663667117496968e-06, - "loss": 0.6551, + "epoch": 0.36, + "grad_norm": 1.8167313743886233, + "learning_rate": 7.387047672729758e-06, + "loss": 0.6252, "step": 5092 }, { - "epoch": 0.54, - "grad_norm": 2.5727074812927193, - "learning_rate": 4.6619667589243225e-06, - "loss": 0.6365, + "epoch": 0.36, + "grad_norm": 1.660637060186211, + "learning_rate": 7.386037858620074e-06, + "loss": 0.5458, "step": 5093 }, { - "epoch": 0.54, - "grad_norm": 2.3970092435076134, - "learning_rate": 4.6602664396234e-06, - "loss": 0.5804, + "epoch": 0.36, + "grad_norm": 3.0486344464513095, + "learning_rate": 7.385027918471783e-06, + "loss": 0.5139, "step": 5094 }, { - "epoch": 0.54, - "grad_norm": 2.186207872425851, - "learning_rate": 4.658566159791742e-06, - "loss": 0.6546, + "epoch": 0.36, + "grad_norm": 1.6898381578781265, + "learning_rate": 7.384017852338239e-06, + "loss": 0.533, "step": 5095 }, { - "epoch": 0.54, - "grad_norm": 2.655332565894822, - "learning_rate": 4.656865919626883e-06, - "loss": 0.5582, + "epoch": 0.36, + "grad_norm": 1.5892420424942408, + "learning_rate": 7.383007660272792e-06, + "loss": 0.5227, "step": 5096 }, { - "epoch": 0.54, - "grad_norm": 3.041070063381227, - "learning_rate": 4.655165719326347e-06, - "loss": 0.7031, + "epoch": 0.36, + "grad_norm": 5.617944199601051, + "learning_rate": 7.381997342328806e-06, + "loss": 0.4923, "step": 5097 }, { - "epoch": 0.54, - "grad_norm": 1.9879284762088225, - "learning_rate": 4.653465559087661e-06, - "loss": 0.6188, + "epoch": 0.36, + "grad_norm": 1.6975411839023307, + "learning_rate": 7.380986898559648e-06, + "loss": 0.6243, "step": 5098 }, { - "epoch": 0.54, - "grad_norm": 2.362708201617922, - "learning_rate": 4.651765439108344e-06, - "loss": 0.5938, + "epoch": 0.36, + "grad_norm": 2.096827019532785, + "learning_rate": 7.3799763290186945e-06, + "loss": 0.4832, "step": 5099 }, { - "epoch": 0.54, - "grad_norm": 2.3036918192067968, - "learning_rate": 4.650065359585914e-06, - "loss": 0.6185, + "epoch": 0.36, + "grad_norm": 1.8254177421631068, + "learning_rate": 7.378965633759327e-06, + "loss": 0.569, "step": 5100 }, { - "epoch": 0.54, - "grad_norm": 3.862595099667224, - "learning_rate": 4.648365320717876e-06, - "loss": 0.6754, + "epoch": 0.36, + "grad_norm": 1.7065811373509632, + "learning_rate": 7.377954812834933e-06, + "loss": 0.5063, "step": 5101 }, { - "epoch": 0.54, - "grad_norm": 2.3230616471795256, - "learning_rate": 4.64666532270174e-06, - "loss": 0.6647, + "epoch": 0.36, + "grad_norm": 1.5464949360535123, + "learning_rate": 7.3769438662989104e-06, + "loss": 0.5904, "step": 5102 }, { - "epoch": 0.54, - "grad_norm": 2.0507051961729577, - "learning_rate": 4.644965365735004e-06, - "loss": 0.5953, + "epoch": 0.36, + "grad_norm": 1.7067199625771512, + "learning_rate": 7.375932794204657e-06, + "loss": 0.615, "step": 5103 }, { - "epoch": 0.54, - "grad_norm": 2.422141046773203, - "learning_rate": 4.643265450015169e-06, - "loss": 0.7227, + "epoch": 0.36, + "grad_norm": 1.5485577943162878, + "learning_rate": 7.374921596605584e-06, + "loss": 0.5438, "step": 5104 }, { - "epoch": 0.54, - "grad_norm": 2.2097746173914743, - "learning_rate": 4.6415655757397206e-06, - "loss": 0.6461, + "epoch": 0.36, + "grad_norm": 1.6553840929194712, + "learning_rate": 7.373910273555102e-06, + "loss": 0.5805, "step": 5105 }, { - "epoch": 0.54, - "grad_norm": 2.0454571953490075, - "learning_rate": 4.639865743106148e-06, - "loss": 0.5875, + "epoch": 0.36, + "grad_norm": 1.769091854499526, + "learning_rate": 7.372898825106638e-06, + "loss": 0.5515, "step": 5106 }, { - "epoch": 0.54, - "grad_norm": 3.145981246348931, - "learning_rate": 4.63816595231193e-06, - "loss": 0.6611, + "epoch": 0.36, + "grad_norm": 1.7164391576368183, + "learning_rate": 7.371887251313617e-06, + "loss": 0.5663, "step": 5107 }, { - "epoch": 0.54, - "grad_norm": 1.9222457727504096, - "learning_rate": 4.636466203554548e-06, - "loss": 0.5957, + "epoch": 0.36, + "grad_norm": 1.7118583076209484, + "learning_rate": 7.370875552229475e-06, + "loss": 0.6394, "step": 5108 }, { - "epoch": 0.54, - "grad_norm": 2.509150332110932, - "learning_rate": 4.634766497031472e-06, - "loss": 0.6091, + "epoch": 0.36, + "grad_norm": 0.8047624824745919, + "learning_rate": 7.3698637279076515e-06, + "loss": 0.443, "step": 5109 }, { - "epoch": 0.54, - "grad_norm": 2.8618331161141985, - "learning_rate": 4.633066832940167e-06, - "loss": 0.6478, + "epoch": 0.36, + "grad_norm": 0.7870025201378461, + "learning_rate": 7.368851778401597e-06, + "loss": 0.4628, "step": 5110 }, { - "epoch": 0.54, - "grad_norm": 2.3088562094725518, - "learning_rate": 4.631367211478098e-06, - "loss": 0.5967, + "epoch": 0.36, + "grad_norm": 1.7165667211619287, + "learning_rate": 7.367839703764763e-06, + "loss": 0.5545, "step": 5111 }, { - "epoch": 0.54, - "grad_norm": 2.2252841546630666, - "learning_rate": 4.62966763284272e-06, - "loss": 0.703, + "epoch": 0.36, + "grad_norm": 0.7964693908986769, + "learning_rate": 7.366827504050615e-06, + "loss": 0.4579, "step": 5112 }, { - "epoch": 0.54, - "grad_norm": 0.9848922788689293, - "learning_rate": 4.6279680972314875e-06, - "loss": 0.5806, + "epoch": 0.36, + "grad_norm": 0.7648322301693099, + "learning_rate": 7.365815179312615e-06, + "loss": 0.4559, "step": 5113 }, { - "epoch": 0.54, - "grad_norm": 2.421707800583379, - "learning_rate": 4.626268604841844e-06, - "loss": 0.655, + "epoch": 0.36, + "grad_norm": 2.6015047173429493, + "learning_rate": 7.364802729604242e-06, + "loss": 0.4836, "step": 5114 }, { - "epoch": 0.54, - "grad_norm": 3.060075796638142, - "learning_rate": 4.624569155871235e-06, - "loss": 0.6389, + "epoch": 0.36, + "grad_norm": 1.9447296172232715, + "learning_rate": 7.363790154978977e-06, + "loss": 0.5493, "step": 5115 }, { - "epoch": 0.54, - "grad_norm": 2.708021895892409, - "learning_rate": 4.622869750517094e-06, - "loss": 0.7085, + "epoch": 0.36, + "grad_norm": 2.482232716389896, + "learning_rate": 7.362777455490305e-06, + "loss": 0.5862, "step": 5116 }, { - "epoch": 0.54, - "grad_norm": 2.198154906455453, - "learning_rate": 4.621170388976858e-06, - "loss": 0.5702, + "epoch": 0.36, + "grad_norm": 1.66128418816264, + "learning_rate": 7.361764631191723e-06, + "loss": 0.5406, "step": 5117 }, { - "epoch": 0.54, - "grad_norm": 2.074916489895744, - "learning_rate": 4.619471071447949e-06, - "loss": 0.514, + "epoch": 0.36, + "grad_norm": 1.7145440436681805, + "learning_rate": 7.3607516821367295e-06, + "loss": 0.5934, "step": 5118 }, { - "epoch": 0.54, - "grad_norm": 2.89007197368037, - "learning_rate": 4.617771798127792e-06, - "loss": 0.6663, + "epoch": 0.36, + "grad_norm": 1.9539714113278976, + "learning_rate": 7.359738608378835e-06, + "loss": 0.5353, "step": 5119 }, { - "epoch": 0.54, - "grad_norm": 2.186100478989604, - "learning_rate": 4.616072569213802e-06, - "loss": 0.5598, + "epoch": 0.36, + "grad_norm": 0.8316434182820134, + "learning_rate": 7.358725409971547e-06, + "loss": 0.4601, "step": 5120 }, { - "epoch": 0.54, - "grad_norm": 4.737181403903594, - "learning_rate": 4.614373384903391e-06, - "loss": 0.6478, + "epoch": 0.36, + "grad_norm": 2.5510956754206897, + "learning_rate": 7.357712086968394e-06, + "loss": 0.5366, "step": 5121 }, { - "epoch": 0.54, - "grad_norm": 2.744234393991175, - "learning_rate": 4.612674245393967e-06, - "loss": 0.6454, + "epoch": 0.36, + "grad_norm": 1.5913794148353335, + "learning_rate": 7.356698639422898e-06, + "loss": 0.4833, "step": 5122 }, { - "epoch": 0.54, - "grad_norm": 2.4651854631448478, - "learning_rate": 4.610975150882928e-06, - "loss": 0.6289, + "epoch": 0.36, + "grad_norm": 1.751716413720923, + "learning_rate": 7.355685067388595e-06, + "loss": 0.5086, "step": 5123 }, { - "epoch": 0.54, - "grad_norm": 5.106591381662164, - "learning_rate": 4.609276101567672e-06, - "loss": 0.6305, + "epoch": 0.36, + "grad_norm": 1.4944767364572513, + "learning_rate": 7.354671370919024e-06, + "loss": 0.5229, "step": 5124 }, { - "epoch": 0.54, - "grad_norm": 2.8645757518422106, - "learning_rate": 4.607577097645587e-06, - "loss": 0.5743, + "epoch": 0.36, + "grad_norm": 2.470657628710702, + "learning_rate": 7.353657550067733e-06, + "loss": 0.5829, "step": 5125 }, { - "epoch": 0.54, - "grad_norm": 2.8329814893534113, - "learning_rate": 4.605878139314065e-06, - "loss": 0.6463, + "epoch": 0.36, + "grad_norm": 1.7930545104503388, + "learning_rate": 7.352643604888274e-06, + "loss": 0.5526, "step": 5126 }, { - "epoch": 0.54, - "grad_norm": 2.3889289397117075, - "learning_rate": 4.604179226770478e-06, - "loss": 0.7184, + "epoch": 0.36, + "grad_norm": 1.6268312736568769, + "learning_rate": 7.3516295354342096e-06, + "loss": 0.519, "step": 5127 }, { - "epoch": 0.54, - "grad_norm": 2.124562346527691, - "learning_rate": 4.602480360212205e-06, - "loss": 0.6488, + "epoch": 0.36, + "grad_norm": 1.533149796565539, + "learning_rate": 7.350615341759103e-06, + "loss": 0.5274, "step": 5128 }, { - "epoch": 0.54, - "grad_norm": 2.7833987833164624, - "learning_rate": 4.600781539836614e-06, - "loss": 0.6184, + "epoch": 0.36, + "grad_norm": 1.381698644028891, + "learning_rate": 7.34960102391653e-06, + "loss": 0.4644, "step": 5129 }, { - "epoch": 0.54, - "grad_norm": 2.554887684552683, - "learning_rate": 4.5990827658410705e-06, - "loss": 0.6443, + "epoch": 0.36, + "grad_norm": 1.5482866459149196, + "learning_rate": 7.348586581960068e-06, + "loss": 0.5219, "step": 5130 }, { - "epoch": 0.54, - "grad_norm": 2.83397128521315, - "learning_rate": 4.597384038422933e-06, - "loss": 0.6566, + "epoch": 0.36, + "grad_norm": 1.7753605855524777, + "learning_rate": 7.347572015943307e-06, + "loss": 0.5642, "step": 5131 }, { - "epoch": 0.54, - "grad_norm": 2.322027615656651, - "learning_rate": 4.595685357779553e-06, - "loss": 0.6787, + "epoch": 0.36, + "grad_norm": 1.854941836258718, + "learning_rate": 7.3465573259198365e-06, + "loss": 0.5447, "step": 5132 }, { - "epoch": 0.54, - "grad_norm": 4.634011797281909, - "learning_rate": 4.593986724108279e-06, - "loss": 0.5787, + "epoch": 0.36, + "grad_norm": 2.2452714700288623, + "learning_rate": 7.345542511943257e-06, + "loss": 0.6431, "step": 5133 }, { - "epoch": 0.54, - "grad_norm": 1.0666278509393818, - "learning_rate": 4.592288137606454e-06, - "loss": 0.5666, + "epoch": 0.36, + "grad_norm": 1.6501921438272449, + "learning_rate": 7.344527574067174e-06, + "loss": 0.561, "step": 5134 }, { - "epoch": 0.54, - "grad_norm": 3.207516428553423, - "learning_rate": 4.590589598471416e-06, - "loss": 0.6368, + "epoch": 0.36, + "grad_norm": 1.668503088340659, + "learning_rate": 7.343512512345199e-06, + "loss": 0.5308, "step": 5135 }, { - "epoch": 0.54, - "grad_norm": 3.49123455675996, - "learning_rate": 4.588891106900493e-06, - "loss": 0.7018, + "epoch": 0.36, + "grad_norm": 1.5177840970476002, + "learning_rate": 7.342497326830953e-06, + "loss": 0.5094, "step": 5136 }, { - "epoch": 0.54, - "grad_norm": 0.9847912096139533, - "learning_rate": 4.587192663091014e-06, - "loss": 0.5966, + "epoch": 0.36, + "grad_norm": 1.6522433800671026, + "learning_rate": 7.341482017578061e-06, + "loss": 0.5108, "step": 5137 }, { - "epoch": 0.54, - "grad_norm": 3.22624018717781, - "learning_rate": 4.5854942672402965e-06, - "loss": 0.582, + "epoch": 0.36, + "grad_norm": 1.6250208218789302, + "learning_rate": 7.3404665846401555e-06, + "loss": 0.5351, "step": 5138 }, { - "epoch": 0.54, - "grad_norm": 1.9077695725913995, - "learning_rate": 4.5837959195456605e-06, - "loss": 0.6262, + "epoch": 0.36, + "grad_norm": 1.7921397949737752, + "learning_rate": 7.339451028070873e-06, + "loss": 0.5419, "step": 5139 }, { - "epoch": 0.54, - "grad_norm": 2.5077328392848472, - "learning_rate": 4.5820976202044085e-06, - "loss": 0.678, + "epoch": 0.36, + "grad_norm": 2.1160619574622057, + "learning_rate": 7.338435347923861e-06, + "loss": 0.4881, "step": 5140 }, { - "epoch": 0.54, - "grad_norm": 2.107242753594195, - "learning_rate": 4.580399369413847e-06, - "loss": 0.5716, + "epoch": 0.36, + "grad_norm": 1.6440051312739723, + "learning_rate": 7.337419544252771e-06, + "loss": 0.55, "step": 5141 }, { - "epoch": 0.54, - "grad_norm": 2.2886530499565723, - "learning_rate": 4.578701167371274e-06, - "loss": 0.6729, + "epoch": 0.36, + "grad_norm": 1.4087838815695306, + "learning_rate": 7.336403617111258e-06, + "loss": 0.4658, "step": 5142 }, { - "epoch": 0.54, - "grad_norm": 1.806852048189923, - "learning_rate": 4.577003014273981e-06, - "loss": 0.6226, + "epoch": 0.36, + "grad_norm": 2.0091094500961226, + "learning_rate": 7.335387566552991e-06, + "loss": 0.5263, "step": 5143 }, { - "epoch": 0.54, - "grad_norm": 2.0468082920924573, - "learning_rate": 4.575304910319257e-06, - "loss": 0.7215, + "epoch": 0.37, + "grad_norm": 1.6479128289239722, + "learning_rate": 7.334371392631638e-06, + "loss": 0.575, "step": 5144 }, { - "epoch": 0.54, - "grad_norm": 2.3451100058055863, - "learning_rate": 4.573606855704379e-06, - "loss": 0.5165, + "epoch": 0.37, + "grad_norm": 2.407416666329187, + "learning_rate": 7.333355095400879e-06, + "loss": 0.548, "step": 5145 }, { - "epoch": 0.54, - "grad_norm": 2.122583359383461, - "learning_rate": 4.571908850626625e-06, - "loss": 0.558, + "epoch": 0.37, + "grad_norm": 3.282268061927636, + "learning_rate": 7.332338674914398e-06, + "loss": 0.5505, "step": 5146 }, { - "epoch": 0.54, - "grad_norm": 2.1021680512947545, - "learning_rate": 4.570210895283262e-06, - "loss": 0.5882, + "epoch": 0.37, + "grad_norm": 1.6932025316255659, + "learning_rate": 7.3313221312258846e-06, + "loss": 0.5556, "step": 5147 }, { - "epoch": 0.54, - "grad_norm": 2.613254013725175, - "learning_rate": 4.568512989871557e-06, - "loss": 0.627, + "epoch": 0.37, + "grad_norm": 1.6795663400087641, + "learning_rate": 7.330305464389036e-06, + "loss": 0.5455, "step": 5148 }, { - "epoch": 0.54, - "grad_norm": 1.063740812597954, - "learning_rate": 4.566815134588763e-06, - "loss": 0.5857, + "epoch": 0.37, + "grad_norm": 1.497900612118339, + "learning_rate": 7.3292886744575575e-06, + "loss": 0.4593, "step": 5149 }, { - "epoch": 0.54, - "grad_norm": 2.3292145678287235, - "learning_rate": 4.565117329632137e-06, - "loss": 0.5244, + "epoch": 0.37, + "grad_norm": 1.6561951405207849, + "learning_rate": 7.328271761485159e-06, + "loss": 0.5432, "step": 5150 }, { - "epoch": 0.54, - "grad_norm": 2.2621172262271285, - "learning_rate": 4.5634195751989195e-06, - "loss": 0.6286, + "epoch": 0.37, + "grad_norm": 1.4652624060160537, + "learning_rate": 7.327254725525554e-06, + "loss": 0.5867, "step": 5151 }, { - "epoch": 0.54, - "grad_norm": 2.305527091957668, - "learning_rate": 4.561721871486357e-06, - "loss": 0.6073, + "epoch": 0.37, + "grad_norm": 3.040675410734751, + "learning_rate": 7.326237566632471e-06, + "loss": 0.5485, "step": 5152 }, { - "epoch": 0.54, - "grad_norm": 2.664097973486397, - "learning_rate": 4.5600242186916786e-06, - "loss": 0.7036, + "epoch": 0.37, + "grad_norm": 0.8719122593132272, + "learning_rate": 7.325220284859638e-06, + "loss": 0.4657, "step": 5153 }, { - "epoch": 0.54, - "grad_norm": 3.907247212421276, - "learning_rate": 4.5583266170121155e-06, - "loss": 0.6086, + "epoch": 0.37, + "grad_norm": 2.9913566725087914, + "learning_rate": 7.324202880260789e-06, + "loss": 0.5088, "step": 5154 }, { - "epoch": 0.54, - "grad_norm": 3.356425324401827, - "learning_rate": 4.556629066644888e-06, - "loss": 0.5769, + "epoch": 0.37, + "grad_norm": 1.6366908610435897, + "learning_rate": 7.32318535288967e-06, + "loss": 0.5485, "step": 5155 }, { - "epoch": 0.54, - "grad_norm": 3.328669215805127, - "learning_rate": 4.554931567787214e-06, - "loss": 0.6026, + "epoch": 0.37, + "grad_norm": 1.5070794050701657, + "learning_rate": 7.322167702800027e-06, + "loss": 0.4738, "step": 5156 }, { - "epoch": 0.54, - "grad_norm": 2.2570183445717817, - "learning_rate": 4.553234120636306e-06, - "loss": 0.6124, + "epoch": 0.37, + "grad_norm": 1.601711919125155, + "learning_rate": 7.321149930045617e-06, + "loss": 0.599, "step": 5157 }, { - "epoch": 0.54, - "grad_norm": 3.081027926632191, - "learning_rate": 4.551536725389364e-06, - "loss": 0.6321, + "epoch": 0.37, + "grad_norm": 1.4274056985595942, + "learning_rate": 7.3201320346802026e-06, + "loss": 0.528, "step": 5158 }, { - "epoch": 0.54, - "grad_norm": 2.273359671518665, - "learning_rate": 4.54983938224359e-06, - "loss": 0.6413, + "epoch": 0.37, + "grad_norm": 1.7363807187178635, + "learning_rate": 7.319114016757555e-06, + "loss": 0.5034, "step": 5159 }, { - "epoch": 0.54, - "grad_norm": 2.9426125776938763, - "learning_rate": 4.5481420913961734e-06, - "loss": 0.6623, + "epoch": 0.37, + "grad_norm": 1.479588039668636, + "learning_rate": 7.318095876331445e-06, + "loss": 0.5594, "step": 5160 }, { - "epoch": 0.54, - "grad_norm": 2.272805905164049, - "learning_rate": 4.546444853044308e-06, - "loss": 0.6284, + "epoch": 0.37, + "grad_norm": 1.707592600216401, + "learning_rate": 7.317077613455656e-06, + "loss": 0.5339, "step": 5161 }, { - "epoch": 0.54, - "grad_norm": 4.032007123280218, - "learning_rate": 4.544747667385163e-06, - "loss": 0.671, + "epoch": 0.37, + "grad_norm": 2.026485083570643, + "learning_rate": 7.316059228183976e-06, + "loss": 0.4725, "step": 5162 }, { - "epoch": 0.54, - "grad_norm": 2.705544234308645, - "learning_rate": 4.543050534615919e-06, - "loss": 0.6616, + "epoch": 0.37, + "grad_norm": 1.5366769684740051, + "learning_rate": 7.3150407205702e-06, + "loss": 0.5081, "step": 5163 }, { - "epoch": 0.54, - "grad_norm": 2.3270348164118886, - "learning_rate": 4.541353454933743e-06, - "loss": 0.6169, + "epoch": 0.37, + "grad_norm": 1.5441804151086802, + "learning_rate": 7.3140220906681295e-06, + "loss": 0.539, "step": 5164 }, { - "epoch": 0.54, - "grad_norm": 3.1781706730214743, - "learning_rate": 4.539656428535799e-06, - "loss": 0.6087, + "epoch": 0.37, + "grad_norm": 1.6583269138089602, + "learning_rate": 7.313003338531569e-06, + "loss": 0.519, "step": 5165 }, { - "epoch": 0.54, - "grad_norm": 2.157315390560079, - "learning_rate": 4.537959455619238e-06, - "loss": 0.5943, + "epoch": 0.37, + "grad_norm": 1.9048494192787009, + "learning_rate": 7.311984464214337e-06, + "loss": 0.6251, "step": 5166 }, { - "epoch": 0.54, - "grad_norm": 2.909391157704517, - "learning_rate": 4.536262536381213e-06, - "loss": 0.6192, + "epoch": 0.37, + "grad_norm": 1.4915246845122918, + "learning_rate": 7.31096546777025e-06, + "loss": 0.5814, "step": 5167 }, { - "epoch": 0.54, - "grad_norm": 2.3877838658782027, - "learning_rate": 4.5345656710188645e-06, - "loss": 0.6268, + "epoch": 0.37, + "grad_norm": 1.4534310573023528, + "learning_rate": 7.309946349253138e-06, + "loss": 0.5034, "step": 5168 }, { - "epoch": 0.54, - "grad_norm": 2.8840280487338967, - "learning_rate": 4.532868859729333e-06, - "loss": 0.6004, + "epoch": 0.37, + "grad_norm": 2.0594336362128467, + "learning_rate": 7.30892710871683e-06, + "loss": 0.544, "step": 5169 }, { - "epoch": 0.54, - "grad_norm": 2.113177902066499, - "learning_rate": 4.531172102709746e-06, - "loss": 0.6332, + "epoch": 0.37, + "grad_norm": 2.0545186429146343, + "learning_rate": 7.307907746215172e-06, + "loss": 0.5242, "step": 5170 }, { - "epoch": 0.54, - "grad_norm": 2.0427183759210084, - "learning_rate": 4.529475400157228e-06, - "loss": 0.6404, + "epoch": 0.37, + "grad_norm": 0.9532472089213389, + "learning_rate": 7.306888261802003e-06, + "loss": 0.4698, "step": 5171 }, { - "epoch": 0.54, - "grad_norm": 2.552191146094726, - "learning_rate": 4.527778752268899e-06, - "loss": 0.6292, + "epoch": 0.37, + "grad_norm": 1.4808066538116278, + "learning_rate": 7.305868655531181e-06, + "loss": 0.4775, "step": 5172 }, { - "epoch": 0.54, - "grad_norm": 2.3830504963095254, - "learning_rate": 4.5260821592418685e-06, - "loss": 0.6213, + "epoch": 0.37, + "grad_norm": 1.7245191819532661, + "learning_rate": 7.304848927456563e-06, + "loss": 0.5338, "step": 5173 }, { - "epoch": 0.54, - "grad_norm": 5.240778096163022, - "learning_rate": 4.5243856212732466e-06, - "loss": 0.5673, + "epoch": 0.37, + "grad_norm": 1.7539730374032696, + "learning_rate": 7.303829077632015e-06, + "loss": 0.5417, "step": 5174 }, { - "epoch": 0.54, - "grad_norm": 1.0053178425031761, - "learning_rate": 4.5226891385601235e-06, - "loss": 0.5822, + "epoch": 0.37, + "grad_norm": 2.062984962502777, + "learning_rate": 7.302809106111408e-06, + "loss": 0.6039, "step": 5175 }, { - "epoch": 0.54, - "grad_norm": 2.4335630650323252, - "learning_rate": 4.520992711299599e-06, - "loss": 0.6692, + "epoch": 0.37, + "grad_norm": 2.0286543379729762, + "learning_rate": 7.301789012948622e-06, + "loss": 0.5192, "step": 5176 }, { - "epoch": 0.54, - "grad_norm": 2.058047240249313, - "learning_rate": 4.519296339688754e-06, - "loss": 0.5852, + "epoch": 0.37, + "grad_norm": 1.9233514131753247, + "learning_rate": 7.300768798197541e-06, + "loss": 0.505, "step": 5177 }, { - "epoch": 0.54, - "grad_norm": 3.063974457211975, - "learning_rate": 4.517600023924673e-06, - "loss": 0.6141, + "epoch": 0.37, + "grad_norm": 1.8666881431773925, + "learning_rate": 7.299748461912054e-06, + "loss": 0.5427, "step": 5178 }, { - "epoch": 0.54, - "grad_norm": 2.1386266960462503, - "learning_rate": 4.515903764204428e-06, - "loss": 0.6138, + "epoch": 0.37, + "grad_norm": 5.655594815968916, + "learning_rate": 7.298728004146062e-06, + "loss": 0.5977, "step": 5179 }, { - "epoch": 0.55, - "grad_norm": 2.5711607873760527, - "learning_rate": 4.514207560725082e-06, - "loss": 0.6987, + "epoch": 0.37, + "grad_norm": 0.8678658919741666, + "learning_rate": 7.297707424953467e-06, + "loss": 0.4475, "step": 5180 }, { - "epoch": 0.55, - "grad_norm": 1.979892923073177, - "learning_rate": 4.512511413683698e-06, - "loss": 0.6426, + "epoch": 0.37, + "grad_norm": 1.6001194034820867, + "learning_rate": 7.296686724388181e-06, + "loss": 0.5428, "step": 5181 }, { - "epoch": 0.55, - "grad_norm": 2.1211499569858487, - "learning_rate": 4.510815323277329e-06, - "loss": 0.6801, + "epoch": 0.37, + "grad_norm": 2.698882123735311, + "learning_rate": 7.29566590250412e-06, + "loss": 0.5413, "step": 5182 }, { - "epoch": 0.55, - "grad_norm": 2.415322447097737, - "learning_rate": 4.509119289703023e-06, - "loss": 0.5961, + "epoch": 0.37, + "grad_norm": 1.9559582339747006, + "learning_rate": 7.294644959355208e-06, + "loss": 0.5643, "step": 5183 }, { - "epoch": 0.55, - "grad_norm": 2.4841757246588174, - "learning_rate": 4.507423313157815e-06, - "loss": 0.6813, + "epoch": 0.37, + "grad_norm": 3.1659993023250506, + "learning_rate": 7.293623894995372e-06, + "loss": 0.5764, "step": 5184 }, { - "epoch": 0.55, - "grad_norm": 2.409140496949704, - "learning_rate": 4.505727393838746e-06, - "loss": 0.6362, + "epoch": 0.37, + "grad_norm": 1.621399765210913, + "learning_rate": 7.2926027094785514e-06, + "loss": 0.5669, "step": 5185 }, { - "epoch": 0.55, - "grad_norm": 2.3420213528636924, - "learning_rate": 4.504031531942837e-06, - "loss": 0.6293, + "epoch": 0.37, + "grad_norm": 1.7723799861440888, + "learning_rate": 7.291581402858687e-06, + "loss": 0.5573, "step": 5186 }, { - "epoch": 0.55, - "grad_norm": 2.454227338659623, - "learning_rate": 4.502335727667114e-06, - "loss": 0.6149, + "epoch": 0.37, + "grad_norm": 1.897439195656029, + "learning_rate": 7.290559975189727e-06, + "loss": 0.5726, "step": 5187 }, { - "epoch": 0.55, - "grad_norm": 2.7418078304273394, - "learning_rate": 4.500639981208586e-06, - "loss": 0.6847, + "epoch": 0.37, + "grad_norm": 1.906686590248555, + "learning_rate": 7.2895384265256285e-06, + "loss": 0.5004, "step": 5188 }, { - "epoch": 0.55, - "grad_norm": 1.9687799092010179, - "learning_rate": 4.498944292764261e-06, - "loss": 0.6455, + "epoch": 0.37, + "grad_norm": 1.5776828158401848, + "learning_rate": 7.288516756920353e-06, + "loss": 0.5374, "step": 5189 }, { - "epoch": 0.55, - "grad_norm": 2.3321151600204844, - "learning_rate": 4.497248662531139e-06, - "loss": 0.605, + "epoch": 0.37, + "grad_norm": 1.50257283572745, + "learning_rate": 7.287494966427866e-06, + "loss": 0.5972, "step": 5190 }, { - "epoch": 0.55, - "grad_norm": 3.4606611035859682, - "learning_rate": 4.495553090706216e-06, - "loss": 0.6776, + "epoch": 0.37, + "grad_norm": 1.6084992642095253, + "learning_rate": 7.286473055102146e-06, + "loss": 0.4864, "step": 5191 }, { - "epoch": 0.55, - "grad_norm": 2.3731291130732086, - "learning_rate": 4.493857577486477e-06, - "loss": 0.6971, + "epoch": 0.37, + "grad_norm": 1.48636244380556, + "learning_rate": 7.28545102299717e-06, + "loss": 0.5508, "step": 5192 }, { - "epoch": 0.55, - "grad_norm": 2.041114475320568, - "learning_rate": 4.492162123068899e-06, - "loss": 0.5753, + "epoch": 0.37, + "grad_norm": 1.9886120169376162, + "learning_rate": 7.2844288701669265e-06, + "loss": 0.5642, "step": 5193 }, { - "epoch": 0.55, - "grad_norm": 2.5283267101737827, - "learning_rate": 4.49046672765046e-06, - "loss": 0.6632, + "epoch": 0.37, + "grad_norm": 1.6116234406067413, + "learning_rate": 7.28340659666541e-06, + "loss": 0.6063, "step": 5194 }, { - "epoch": 0.55, - "grad_norm": 2.535327625572618, - "learning_rate": 4.488771391428122e-06, - "loss": 0.7297, + "epoch": 0.37, + "grad_norm": 1.4642668753608847, + "learning_rate": 7.282384202546619e-06, + "loss": 0.4531, "step": 5195 }, { - "epoch": 0.55, - "grad_norm": 2.3383531443300956, - "learning_rate": 4.487076114598848e-06, - "loss": 0.5403, + "epoch": 0.37, + "grad_norm": 1.6667347374187635, + "learning_rate": 7.281361687864563e-06, + "loss": 0.5795, "step": 5196 }, { - "epoch": 0.55, - "grad_norm": 2.1712529025180665, - "learning_rate": 4.485380897359587e-06, - "loss": 0.658, + "epoch": 0.37, + "grad_norm": 1.6105996373018179, + "learning_rate": 7.280339052673251e-06, + "loss": 0.5433, "step": 5197 }, { - "epoch": 0.55, - "grad_norm": 2.2880236706230876, - "learning_rate": 4.483685739907285e-06, - "loss": 0.6671, + "epoch": 0.37, + "grad_norm": 1.66798304950578, + "learning_rate": 7.279316297026704e-06, + "loss": 0.5584, "step": 5198 }, { - "epoch": 0.55, - "grad_norm": 2.923715631742324, - "learning_rate": 4.481990642438881e-06, - "loss": 0.6322, + "epoch": 0.37, + "grad_norm": 2.0345685507203704, + "learning_rate": 7.278293420978946e-06, + "loss": 0.4837, "step": 5199 }, { - "epoch": 0.55, - "grad_norm": 2.5156565772915966, - "learning_rate": 4.480295605151308e-06, - "loss": 0.779, + "epoch": 0.37, + "grad_norm": 1.6415103640630697, + "learning_rate": 7.27727042458401e-06, + "loss": 0.568, "step": 5200 }, { - "epoch": 0.55, - "grad_norm": 2.279437155096648, - "learning_rate": 4.47860062824149e-06, - "loss": 0.5941, + "epoch": 0.37, + "grad_norm": 1.5920921863874502, + "learning_rate": 7.276247307895933e-06, + "loss": 0.5206, "step": 5201 }, { - "epoch": 0.55, - "grad_norm": 2.8210671148826783, - "learning_rate": 4.4769057119063425e-06, - "loss": 0.6129, + "epoch": 0.37, + "grad_norm": 1.5565344878285035, + "learning_rate": 7.2752240709687606e-06, + "loss": 0.509, "step": 5202 }, { - "epoch": 0.55, - "grad_norm": 2.1034516568430606, - "learning_rate": 4.475210856342777e-06, - "loss": 0.655, + "epoch": 0.37, + "grad_norm": 2.0289398979663185, + "learning_rate": 7.274200713856543e-06, + "loss": 0.5374, "step": 5203 }, { - "epoch": 0.55, - "grad_norm": 2.1311538275721205, - "learning_rate": 4.473516061747697e-06, - "loss": 0.537, + "epoch": 0.37, + "grad_norm": 1.8414926599175903, + "learning_rate": 7.273177236613338e-06, + "loss": 0.4828, "step": 5204 }, { - "epoch": 0.55, - "grad_norm": 0.9788330045930997, - "learning_rate": 4.471821328318001e-06, - "loss": 0.5896, + "epoch": 0.37, + "grad_norm": 1.4859337274039752, + "learning_rate": 7.2721536392932094e-06, + "loss": 0.4841, "step": 5205 }, { - "epoch": 0.55, - "grad_norm": 3.8737593347024135, - "learning_rate": 4.470126656250574e-06, - "loss": 0.5861, + "epoch": 0.37, + "grad_norm": 1.8127811820136075, + "learning_rate": 7.2711299219502254e-06, + "loss": 0.508, "step": 5206 }, { - "epoch": 0.55, - "grad_norm": 2.5511717033127996, - "learning_rate": 4.468432045742301e-06, - "loss": 0.5941, + "epoch": 0.37, + "grad_norm": 1.6610388305791963, + "learning_rate": 7.270106084638464e-06, + "loss": 0.5156, "step": 5207 }, { - "epoch": 0.55, - "grad_norm": 2.9074504611235072, - "learning_rate": 4.466737496990057e-06, - "loss": 0.6367, + "epoch": 0.37, + "grad_norm": 1.6731451004252327, + "learning_rate": 7.269082127412006e-06, + "loss": 0.5726, "step": 5208 }, { - "epoch": 0.55, - "grad_norm": 3.069907121072369, - "learning_rate": 4.46504301019071e-06, - "loss": 0.6646, + "epoch": 0.37, + "grad_norm": 1.7726816874005962, + "learning_rate": 7.268058050324942e-06, + "loss": 0.5217, "step": 5209 }, { - "epoch": 0.55, - "grad_norm": 2.6033731154873, - "learning_rate": 4.463348585541117e-06, - "loss": 0.6088, + "epoch": 0.37, + "grad_norm": 1.5539592932620485, + "learning_rate": 7.267033853431366e-06, + "loss": 0.5785, "step": 5210 }, { - "epoch": 0.55, - "grad_norm": 2.188979272302636, - "learning_rate": 4.461654223238136e-06, - "loss": 0.5968, + "epoch": 0.37, + "grad_norm": 3.080811855131007, + "learning_rate": 7.266009536785381e-06, + "loss": 0.5527, "step": 5211 }, { - "epoch": 0.55, - "grad_norm": 2.743951094727751, - "learning_rate": 4.459959923478609e-06, - "loss": 0.6687, + "epoch": 0.37, + "grad_norm": 1.6077367704485828, + "learning_rate": 7.264985100441093e-06, + "loss": 0.5105, "step": 5212 }, { - "epoch": 0.55, - "grad_norm": 2.379573949144014, - "learning_rate": 4.45826568645938e-06, - "loss": 0.6304, + "epoch": 0.37, + "grad_norm": 1.6333428742650145, + "learning_rate": 7.26396054445262e-06, + "loss": 0.5053, "step": 5213 }, { - "epoch": 0.55, - "grad_norm": 2.454626913642241, - "learning_rate": 4.456571512377277e-06, - "loss": 0.601, + "epoch": 0.37, + "grad_norm": 1.8158261170057446, + "learning_rate": 7.262935868874077e-06, + "loss": 0.5936, "step": 5214 }, { - "epoch": 0.55, - "grad_norm": 2.822548767237185, - "learning_rate": 4.454877401429123e-06, - "loss": 0.5249, + "epoch": 0.37, + "grad_norm": 1.9430533592277475, + "learning_rate": 7.261911073759594e-06, + "loss": 0.518, "step": 5215 }, { - "epoch": 0.55, - "grad_norm": 2.6657172122549992, - "learning_rate": 4.453183353811737e-06, - "loss": 0.5839, + "epoch": 0.37, + "grad_norm": 1.6560366663261452, + "learning_rate": 7.260886159163304e-06, + "loss": 0.5179, "step": 5216 }, { - "epoch": 0.55, - "grad_norm": 1.3013033806129486, - "learning_rate": 4.45148936972193e-06, - "loss": 0.5733, + "epoch": 0.37, + "grad_norm": 1.6113097369922826, + "learning_rate": 7.259861125139345e-06, + "loss": 0.4799, "step": 5217 }, { - "epoch": 0.55, - "grad_norm": 2.2066422534371846, - "learning_rate": 4.449795449356502e-06, - "loss": 0.6463, + "epoch": 0.37, + "grad_norm": 1.8758405369358333, + "learning_rate": 7.2588359717418645e-06, + "loss": 0.5628, "step": 5218 }, { - "epoch": 0.55, - "grad_norm": 2.360497161476003, - "learning_rate": 4.4481015929122465e-06, - "loss": 0.6128, + "epoch": 0.37, + "grad_norm": 1.9516202428066758, + "learning_rate": 7.257810699025016e-06, + "loss": 0.5389, "step": 5219 }, { - "epoch": 0.55, - "grad_norm": 2.5466902571725396, - "learning_rate": 4.446407800585954e-06, - "loss": 0.6788, + "epoch": 0.37, + "grad_norm": 1.883040970444659, + "learning_rate": 7.256785307042953e-06, + "loss": 0.6298, "step": 5220 }, { - "epoch": 0.55, - "grad_norm": 7.202579197824117, - "learning_rate": 4.444714072574401e-06, - "loss": 0.6787, + "epoch": 0.37, + "grad_norm": 1.7351678902081595, + "learning_rate": 7.255759795849845e-06, + "loss": 0.5768, "step": 5221 }, { - "epoch": 0.55, - "grad_norm": 3.010123101753715, - "learning_rate": 4.443020409074365e-06, - "loss": 0.7269, + "epoch": 0.37, + "grad_norm": 1.6121768885000411, + "learning_rate": 7.254734165499861e-06, + "loss": 0.6096, "step": 5222 }, { - "epoch": 0.55, - "grad_norm": 2.8939895157613513, - "learning_rate": 4.441326810282606e-06, - "loss": 0.7057, + "epoch": 0.37, + "grad_norm": 1.6629156069350473, + "learning_rate": 7.253708416047176e-06, + "loss": 0.5844, "step": 5223 }, { - "epoch": 0.55, - "grad_norm": 2.7291258805819063, - "learning_rate": 4.4396332763958835e-06, - "loss": 0.6379, + "epoch": 0.37, + "grad_norm": 0.8233515788424914, + "learning_rate": 7.2526825475459775e-06, + "loss": 0.4604, "step": 5224 }, { - "epoch": 0.55, - "grad_norm": 2.8152570916653485, - "learning_rate": 4.437939807610947e-06, - "loss": 0.6389, + "epoch": 0.37, + "grad_norm": 0.7696184197121986, + "learning_rate": 7.251656560050454e-06, + "loss": 0.4621, "step": 5225 }, { - "epoch": 0.55, - "grad_norm": 4.863829933630771, - "learning_rate": 4.436246404124539e-06, - "loss": 0.592, + "epoch": 0.37, + "grad_norm": 1.6466590367571898, + "learning_rate": 7.250630453614801e-06, + "loss": 0.5785, "step": 5226 }, { - "epoch": 0.55, - "grad_norm": 1.9228368391092252, - "learning_rate": 4.4345530661333955e-06, - "loss": 0.6296, + "epoch": 0.37, + "grad_norm": 1.4485095219676252, + "learning_rate": 7.249604228293221e-06, + "loss": 0.5328, "step": 5227 }, { - "epoch": 0.55, - "grad_norm": 2.4158924275798084, - "learning_rate": 4.432859793834239e-06, - "loss": 0.582, + "epoch": 0.37, + "grad_norm": 3.7628713724490788, + "learning_rate": 7.248577884139923e-06, + "loss": 0.4874, "step": 5228 }, { - "epoch": 0.55, - "grad_norm": 2.572611497694934, - "learning_rate": 4.431166587423794e-06, - "loss": 0.6169, + "epoch": 0.37, + "grad_norm": 1.6872449909904645, + "learning_rate": 7.247551421209123e-06, + "loss": 0.5238, "step": 5229 }, { - "epoch": 0.55, - "grad_norm": 2.268729295515817, - "learning_rate": 4.42947344709877e-06, - "loss": 0.6666, + "epoch": 0.37, + "grad_norm": 1.6199210807446809, + "learning_rate": 7.24652483955504e-06, + "loss": 0.5388, "step": 5230 }, { - "epoch": 0.55, - "grad_norm": 2.0952795027263966, - "learning_rate": 4.4277803730558746e-06, - "loss": 0.6486, + "epoch": 0.37, + "grad_norm": 1.8309753134943307, + "learning_rate": 7.245498139231902e-06, + "loss": 0.5703, "step": 5231 }, { - "epoch": 0.55, - "grad_norm": 1.0968523158920367, - "learning_rate": 4.426087365491798e-06, - "loss": 0.5293, + "epoch": 0.37, + "grad_norm": 2.683508779094761, + "learning_rate": 7.244471320293945e-06, + "loss": 0.498, "step": 5232 }, { - "epoch": 0.55, - "grad_norm": 2.088438078890414, - "learning_rate": 4.424394424603234e-06, - "loss": 0.5566, + "epoch": 0.37, + "grad_norm": 1.9454475205644481, + "learning_rate": 7.243444382795407e-06, + "loss": 0.6502, "step": 5233 }, { - "epoch": 0.55, - "grad_norm": 2.1632948290431497, - "learning_rate": 4.42270155058686e-06, - "loss": 0.6262, + "epoch": 0.37, + "grad_norm": 1.3791954575291987, + "learning_rate": 7.2424173267905365e-06, + "loss": 0.5397, "step": 5234 }, { - "epoch": 0.55, - "grad_norm": 0.9284998520052347, - "learning_rate": 4.421008743639353e-06, - "loss": 0.5875, + "epoch": 0.37, + "grad_norm": 1.8238408320863007, + "learning_rate": 7.241390152333582e-06, + "loss": 0.4952, "step": 5235 }, { - "epoch": 0.55, - "grad_norm": 2.0523984063768803, - "learning_rate": 4.419316003957376e-06, - "loss": 0.7087, + "epoch": 0.37, + "grad_norm": 1.604277423795181, + "learning_rate": 7.240362859478807e-06, + "loss": 0.6029, "step": 5236 }, { - "epoch": 0.55, - "grad_norm": 2.443819742443756, - "learning_rate": 4.417623331737587e-06, - "loss": 0.6464, + "epoch": 0.37, + "grad_norm": 1.664031092516312, + "learning_rate": 7.239335448280472e-06, + "loss": 0.6022, "step": 5237 }, { - "epoch": 0.55, - "grad_norm": 2.0896384959029604, - "learning_rate": 4.415930727176634e-06, - "loss": 0.5857, + "epoch": 0.37, + "grad_norm": 1.4206883457301, + "learning_rate": 7.2383079187928516e-06, + "loss": 0.5197, "step": 5238 }, { - "epoch": 0.55, - "grad_norm": 2.230344099608812, - "learning_rate": 4.414238190471163e-06, - "loss": 0.6313, + "epoch": 0.37, + "grad_norm": 1.56805527990652, + "learning_rate": 7.237280271070223e-06, + "loss": 0.5188, "step": 5239 }, { - "epoch": 0.55, - "grad_norm": 3.3364275587007395, - "learning_rate": 4.412545721817806e-06, - "loss": 0.7127, + "epoch": 0.37, + "grad_norm": 1.615523817437708, + "learning_rate": 7.236252505166869e-06, + "loss": 0.5886, "step": 5240 }, { - "epoch": 0.55, - "grad_norm": 1.9806871576287524, - "learning_rate": 4.410853321413187e-06, - "loss": 0.6476, + "epoch": 0.37, + "grad_norm": 1.6228669953749348, + "learning_rate": 7.2352246211370805e-06, + "loss": 0.5459, "step": 5241 }, { - "epoch": 0.55, - "grad_norm": 2.5890707975835805, - "learning_rate": 4.409160989453927e-06, - "loss": 0.6181, + "epoch": 0.37, + "grad_norm": 1.494290237242261, + "learning_rate": 7.234196619035153e-06, + "loss": 0.5083, "step": 5242 }, { - "epoch": 0.55, - "grad_norm": 3.0789313597402534, - "learning_rate": 4.407468726136634e-06, - "loss": 0.637, + "epoch": 0.37, + "grad_norm": 1.6584302627318794, + "learning_rate": 7.2331684989153905e-06, + "loss": 0.5316, "step": 5243 }, { - "epoch": 0.55, - "grad_norm": 2.1476761639567976, - "learning_rate": 4.405776531657916e-06, - "loss": 0.6924, + "epoch": 0.37, + "grad_norm": 1.443592095260721, + "learning_rate": 7.232140260832101e-06, + "loss": 0.5415, "step": 5244 }, { - "epoch": 0.55, - "grad_norm": 3.4858241564055628, - "learning_rate": 4.404084406214358e-06, - "loss": 0.7234, + "epoch": 0.37, + "grad_norm": 1.6004862772342898, + "learning_rate": 7.231111904839599e-06, + "loss": 0.5505, "step": 5245 }, { - "epoch": 0.55, - "grad_norm": 2.3970949139274835, - "learning_rate": 4.402392350002554e-06, - "loss": 0.5235, + "epoch": 0.37, + "grad_norm": 1.6527150745053076, + "learning_rate": 7.230083430992205e-06, + "loss": 0.5624, "step": 5246 }, { - "epoch": 0.55, - "grad_norm": 2.235687197461085, - "learning_rate": 4.400700363219076e-06, - "loss": 0.6731, + "epoch": 0.37, + "grad_norm": 0.8023458865465705, + "learning_rate": 7.2290548393442496e-06, + "loss": 0.4668, "step": 5247 }, { - "epoch": 0.55, - "grad_norm": 8.656344736133104, - "learning_rate": 4.399008446060501e-06, - "loss": 0.6499, + "epoch": 0.37, + "grad_norm": 1.6115487747172093, + "learning_rate": 7.228026129950063e-06, + "loss": 0.5649, "step": 5248 }, { - "epoch": 0.55, - "grad_norm": 3.0639313280738216, - "learning_rate": 4.397316598723385e-06, - "loss": 0.5468, + "epoch": 0.37, + "grad_norm": 1.5072914973469387, + "learning_rate": 7.226997302863987e-06, + "loss": 0.5488, "step": 5249 }, { - "epoch": 0.55, - "grad_norm": 2.0867057122263857, - "learning_rate": 4.3956248214042855e-06, - "loss": 0.6578, + "epoch": 0.37, + "grad_norm": 1.7043892734839095, + "learning_rate": 7.225968358140368e-06, + "loss": 0.5931, "step": 5250 }, { - "epoch": 0.55, - "grad_norm": 2.599040114763554, - "learning_rate": 4.393933114299746e-06, - "loss": 0.6618, + "epoch": 0.37, + "grad_norm": 1.8683975462479476, + "learning_rate": 7.224939295833558e-06, + "loss": 0.6142, "step": 5251 }, { - "epoch": 0.55, - "grad_norm": 2.6001557142240945, - "learning_rate": 4.3922414776063075e-06, - "loss": 0.6799, + "epoch": 0.37, + "grad_norm": 1.5059315638410742, + "learning_rate": 7.223910115997914e-06, + "loss": 0.5338, "step": 5252 }, { - "epoch": 0.55, - "grad_norm": 2.1507305611894223, - "learning_rate": 4.390549911520497e-06, - "loss": 0.663, + "epoch": 0.37, + "grad_norm": 1.4888797602058315, + "learning_rate": 7.222880818687803e-06, + "loss": 0.5707, "step": 5253 }, { - "epoch": 0.55, - "grad_norm": 2.7080296933337946, - "learning_rate": 4.388858416238834e-06, - "loss": 0.5737, + "epoch": 0.37, + "grad_norm": 1.561054412788188, + "learning_rate": 7.221851403957594e-06, + "loss": 0.6096, "step": 5254 }, { - "epoch": 0.55, - "grad_norm": 5.185440108618935, - "learning_rate": 4.3871669919578345e-06, - "loss": 0.6253, + "epoch": 0.37, + "grad_norm": 1.6429889666540278, + "learning_rate": 7.220821871861666e-06, + "loss": 0.5452, "step": 5255 }, { - "epoch": 0.55, - "grad_norm": 2.41165829284016, - "learning_rate": 4.385475638874001e-06, - "loss": 0.6524, + "epoch": 0.37, + "grad_norm": 1.555751235588039, + "learning_rate": 7.219792222454402e-06, + "loss": 0.4979, "step": 5256 }, { - "epoch": 0.55, - "grad_norm": 2.0980213739464113, - "learning_rate": 4.383784357183835e-06, - "loss": 0.5656, + "epoch": 0.37, + "grad_norm": 2.287739592562299, + "learning_rate": 7.21876245579019e-06, + "loss": 0.5943, "step": 5257 }, { - "epoch": 0.55, - "grad_norm": 6.656438895282767, - "learning_rate": 4.382093147083819e-06, - "loss": 0.6449, + "epoch": 0.37, + "grad_norm": 1.4744739900871262, + "learning_rate": 7.217732571923428e-06, + "loss": 0.5059, "step": 5258 }, { - "epoch": 0.55, - "grad_norm": 2.4496335492831403, - "learning_rate": 4.380402008770435e-06, - "loss": 0.6254, + "epoch": 0.37, + "grad_norm": 1.558494791418864, + "learning_rate": 7.216702570908516e-06, + "loss": 0.5249, "step": 5259 }, { - "epoch": 0.55, - "grad_norm": 2.959663295531009, - "learning_rate": 4.378710942440153e-06, - "loss": 0.6069, + "epoch": 0.37, + "grad_norm": 1.7551839196821337, + "learning_rate": 7.215672452799862e-06, + "loss": 0.558, "step": 5260 }, { - "epoch": 0.55, - "grad_norm": 2.7471051867065492, - "learning_rate": 4.377019948289441e-06, - "loss": 0.6661, + "epoch": 0.37, + "grad_norm": 3.114042663375599, + "learning_rate": 7.214642217651883e-06, + "loss": 0.5714, "step": 5261 }, { - "epoch": 0.55, - "grad_norm": 3.8039135221418805, - "learning_rate": 4.375329026514749e-06, - "loss": 0.6398, + "epoch": 0.37, + "grad_norm": 1.6456383582879193, + "learning_rate": 7.213611865518997e-06, + "loss": 0.5921, "step": 5262 }, { - "epoch": 0.55, - "grad_norm": 3.677966696492936, - "learning_rate": 4.373638177312524e-06, - "loss": 0.5647, + "epoch": 0.37, + "grad_norm": 1.5972934299448482, + "learning_rate": 7.212581396455633e-06, + "loss": 0.6443, "step": 5263 }, { - "epoch": 0.55, - "grad_norm": 2.6177835119924455, - "learning_rate": 4.371947400879205e-06, - "loss": 0.6016, + "epoch": 0.37, + "grad_norm": 1.546130372386423, + "learning_rate": 7.2115508105162215e-06, + "loss": 0.5997, "step": 5264 }, { - "epoch": 0.55, - "grad_norm": 2.233055028774205, - "learning_rate": 4.370256697411221e-06, - "loss": 0.5827, + "epoch": 0.37, + "grad_norm": 2.0541408640483616, + "learning_rate": 7.210520107755201e-06, + "loss": 0.5557, "step": 5265 }, { - "epoch": 0.55, - "grad_norm": 2.2362734819525723, - "learning_rate": 4.368566067104998e-06, - "loss": 0.6496, + "epoch": 0.37, + "grad_norm": 1.4076787565227642, + "learning_rate": 7.20948928822702e-06, + "loss": 0.4845, "step": 5266 }, { - "epoch": 0.55, - "grad_norm": 2.211364135754513, - "learning_rate": 4.366875510156939e-06, - "loss": 0.557, + "epoch": 0.37, + "grad_norm": 1.9920923941707174, + "learning_rate": 7.2084583519861275e-06, + "loss": 0.4902, "step": 5267 }, { - "epoch": 0.55, - "grad_norm": 2.8665096255402696, - "learning_rate": 4.365185026763455e-06, - "loss": 0.5965, + "epoch": 0.37, + "grad_norm": 1.8755854174384332, + "learning_rate": 7.207427299086981e-06, + "loss": 0.549, "step": 5268 }, { - "epoch": 0.55, - "grad_norm": 3.0564195544942065, - "learning_rate": 4.363494617120938e-06, - "loss": 0.7145, + "epoch": 0.37, + "grad_norm": 1.6937304562892301, + "learning_rate": 7.2063961295840444e-06, + "loss": 0.5564, "step": 5269 }, { - "epoch": 0.55, - "grad_norm": 2.64369421900083, - "learning_rate": 4.361804281425779e-06, - "loss": 0.725, + "epoch": 0.37, + "grad_norm": 1.6614112783645985, + "learning_rate": 7.20536484353179e-06, + "loss": 0.5986, "step": 5270 }, { - "epoch": 0.55, - "grad_norm": 3.12214434238347, - "learning_rate": 4.360114019874353e-06, - "loss": 0.6916, + "epoch": 0.37, + "grad_norm": 0.8137076783053949, + "learning_rate": 7.20433344098469e-06, + "loss": 0.4631, "step": 5271 }, { - "epoch": 0.55, - "grad_norm": 6.609092742818454, - "learning_rate": 4.35842383266303e-06, - "loss": 0.6325, + "epoch": 0.37, + "grad_norm": 1.402315746956907, + "learning_rate": 7.20330192199723e-06, + "loss": 0.5503, "step": 5272 }, { - "epoch": 0.55, - "grad_norm": 2.300634353062687, - "learning_rate": 4.35673371998817e-06, - "loss": 0.6284, + "epoch": 0.37, + "grad_norm": 1.8730868563169927, + "learning_rate": 7.2022702866238945e-06, + "loss": 0.5921, "step": 5273 }, { - "epoch": 0.55, - "grad_norm": 3.7838481612576356, - "learning_rate": 4.355043682046129e-06, - "loss": 0.6038, + "epoch": 0.37, + "grad_norm": 1.8005092556400688, + "learning_rate": 7.2012385349191815e-06, + "loss": 0.5752, "step": 5274 }, { - "epoch": 0.56, - "grad_norm": 2.7986771832900788, - "learning_rate": 4.353353719033249e-06, - "loss": 0.654, + "epoch": 0.37, + "grad_norm": 2.5364581223426232, + "learning_rate": 7.20020666693759e-06, + "loss": 0.5284, "step": 5275 }, { - "epoch": 0.56, - "grad_norm": 4.87051694324319, - "learning_rate": 4.3516638311458624e-06, - "loss": 0.5146, + "epoch": 0.37, + "grad_norm": 1.6587862283865742, + "learning_rate": 7.199174682733626e-06, + "loss": 0.5422, "step": 5276 }, { - "epoch": 0.56, - "grad_norm": 2.349730181771263, - "learning_rate": 4.349974018580298e-06, - "loss": 0.6656, + "epoch": 0.37, + "grad_norm": 1.8328213600877143, + "learning_rate": 7.1981425823618045e-06, + "loss": 0.4888, "step": 5277 }, { - "epoch": 0.56, - "grad_norm": 2.335395790852798, - "learning_rate": 4.348284281532874e-06, - "loss": 0.5557, + "epoch": 0.37, + "grad_norm": 1.629982461436803, + "learning_rate": 7.197110365876644e-06, + "loss": 0.4827, "step": 5278 }, { - "epoch": 0.56, - "grad_norm": 2.7325757799709605, - "learning_rate": 4.3465946201999e-06, - "loss": 0.7216, + "epoch": 0.37, + "grad_norm": 1.4092732155786576, + "learning_rate": 7.1960780333326695e-06, + "loss": 0.4829, "step": 5279 }, { - "epoch": 0.56, - "grad_norm": 2.104588371726466, - "learning_rate": 4.344905034777672e-06, - "loss": 0.6013, + "epoch": 0.37, + "grad_norm": 1.6696243747739277, + "learning_rate": 7.19504558478441e-06, + "loss": 0.4528, "step": 5280 }, { - "epoch": 0.56, - "grad_norm": 2.3109494580443304, - "learning_rate": 4.343215525462484e-06, - "loss": 0.6356, + "epoch": 0.37, + "grad_norm": 1.4870829947661377, + "learning_rate": 7.194013020286407e-06, + "loss": 0.5127, "step": 5281 }, { - "epoch": 0.56, - "grad_norm": 3.017653085191442, - "learning_rate": 4.3415260924506165e-06, - "loss": 0.6454, + "epoch": 0.37, + "grad_norm": 0.7897434437984284, + "learning_rate": 7.192980339893201e-06, + "loss": 0.4496, "step": 5282 }, { - "epoch": 0.56, - "grad_norm": 2.571020375717761, - "learning_rate": 4.339836735938347e-06, - "loss": 0.5784, + "epoch": 0.37, + "grad_norm": 2.417471151957371, + "learning_rate": 7.191947543659341e-06, + "loss": 0.5393, "step": 5283 }, { - "epoch": 0.56, - "grad_norm": 2.4571187576232276, - "learning_rate": 4.338147456121935e-06, - "loss": 0.6483, + "epoch": 0.37, + "grad_norm": 1.9497259971559688, + "learning_rate": 7.190914631639385e-06, + "loss": 0.5371, "step": 5284 }, { - "epoch": 0.56, - "grad_norm": 2.5644236337830937, - "learning_rate": 4.336458253197637e-06, - "loss": 0.5715, + "epoch": 0.38, + "grad_norm": 1.6395187052572293, + "learning_rate": 7.189881603887897e-06, + "loss": 0.5278, "step": 5285 }, { - "epoch": 0.56, - "grad_norm": 2.511149918383041, - "learning_rate": 4.334769127361703e-06, - "loss": 0.6012, + "epoch": 0.38, + "grad_norm": 1.9199606064617991, + "learning_rate": 7.18884846045944e-06, + "loss": 0.5675, "step": 5286 }, { - "epoch": 0.56, - "grad_norm": 4.883950372753124, - "learning_rate": 4.333080078810369e-06, - "loss": 0.6156, + "epoch": 0.38, + "grad_norm": 1.6567757570098334, + "learning_rate": 7.187815201408592e-06, + "loss": 0.5283, "step": 5287 }, { - "epoch": 0.56, - "grad_norm": 3.4555634315635984, - "learning_rate": 4.331391107739864e-06, - "loss": 0.6276, + "epoch": 0.38, + "grad_norm": 0.68564378957445, + "learning_rate": 7.1867818267899305e-06, + "loss": 0.4231, "step": 5288 }, { - "epoch": 0.56, - "grad_norm": 2.8118042237460923, - "learning_rate": 4.329702214346406e-06, - "loss": 0.6439, + "epoch": 0.38, + "grad_norm": 1.755078972203877, + "learning_rate": 7.185748336658043e-06, + "loss": 0.5283, "step": 5289 }, { - "epoch": 0.56, - "grad_norm": 2.141667067861921, - "learning_rate": 4.328013398826207e-06, - "loss": 0.6292, + "epoch": 0.38, + "grad_norm": 1.632939537354433, + "learning_rate": 7.184714731067521e-06, + "loss": 0.5328, "step": 5290 }, { - "epoch": 0.56, - "grad_norm": 2.1279882409294806, - "learning_rate": 4.3263246613754685e-06, - "loss": 0.6465, + "epoch": 0.38, + "grad_norm": 1.6433772723460291, + "learning_rate": 7.183681010072966e-06, + "loss": 0.5773, "step": 5291 }, { - "epoch": 0.56, - "grad_norm": 5.1797440641409, - "learning_rate": 4.324636002190386e-06, - "loss": 0.6019, + "epoch": 0.38, + "grad_norm": 1.5534244546967315, + "learning_rate": 7.182647173728981e-06, + "loss": 0.5309, "step": 5292 }, { - "epoch": 0.56, - "grad_norm": 2.5518905433521217, - "learning_rate": 4.322947421467138e-06, - "loss": 0.6755, + "epoch": 0.38, + "grad_norm": 2.0750139843265685, + "learning_rate": 7.181613222090175e-06, + "loss": 0.6019, "step": 5293 }, { - "epoch": 0.56, - "grad_norm": 4.707501918054458, - "learning_rate": 4.321258919401903e-06, - "loss": 0.7213, + "epoch": 0.38, + "grad_norm": 1.6006060937009494, + "learning_rate": 7.180579155211167e-06, + "loss": 0.5778, "step": 5294 }, { - "epoch": 0.56, - "grad_norm": 3.090219508336725, - "learning_rate": 4.319570496190843e-06, - "loss": 0.5684, + "epoch": 0.38, + "grad_norm": 1.6890174708372767, + "learning_rate": 7.179544973146579e-06, + "loss": 0.5605, "step": 5295 }, { - "epoch": 0.56, - "grad_norm": 2.5586775104390838, - "learning_rate": 4.317882152030118e-06, - "loss": 0.6409, + "epoch": 0.38, + "grad_norm": 1.617631553486146, + "learning_rate": 7.178510675951042e-06, + "loss": 0.5476, "step": 5296 }, { - "epoch": 0.56, - "grad_norm": 2.7425290806450615, - "learning_rate": 4.316193887115871e-06, - "loss": 0.6583, + "epoch": 0.38, + "grad_norm": 1.896021418610378, + "learning_rate": 7.177476263679186e-06, + "loss": 0.5737, "step": 5297 }, { - "epoch": 0.56, - "grad_norm": 2.929685634618751, - "learning_rate": 4.314505701644242e-06, - "loss": 0.6087, + "epoch": 0.38, + "grad_norm": 1.7065049082230734, + "learning_rate": 7.176441736385658e-06, + "loss": 0.5002, "step": 5298 }, { - "epoch": 0.56, - "grad_norm": 1.037591260391563, - "learning_rate": 4.3128175958113585e-06, - "loss": 0.5755, + "epoch": 0.38, + "grad_norm": 1.4385463718118985, + "learning_rate": 7.1754070941251005e-06, + "loss": 0.5485, "step": 5299 }, { - "epoch": 0.56, - "grad_norm": 2.459322238903088, - "learning_rate": 4.311129569813341e-06, - "loss": 0.6523, + "epoch": 0.38, + "grad_norm": 1.5533163986558027, + "learning_rate": 7.174372336952171e-06, + "loss": 0.5296, "step": 5300 }, { - "epoch": 0.56, - "grad_norm": 3.3425180981358253, - "learning_rate": 4.3094416238463e-06, - "loss": 0.6665, + "epoch": 0.38, + "grad_norm": 2.077080300784717, + "learning_rate": 7.173337464921526e-06, + "loss": 0.4963, "step": 5301 }, { - "epoch": 0.56, - "grad_norm": 2.2917138959022267, - "learning_rate": 4.307753758106332e-06, - "loss": 0.6679, + "epoch": 0.38, + "grad_norm": 1.627443970665686, + "learning_rate": 7.172302478087833e-06, + "loss": 0.5387, "step": 5302 }, { - "epoch": 0.56, - "grad_norm": 1.9682044085217711, - "learning_rate": 4.306065972789533e-06, - "loss": 0.6181, + "epoch": 0.38, + "grad_norm": 2.2574212581540856, + "learning_rate": 7.171267376505761e-06, + "loss": 0.5403, "step": 5303 }, { - "epoch": 0.56, - "grad_norm": 2.339833756953796, - "learning_rate": 4.304378268091982e-06, - "loss": 0.6613, + "epoch": 0.38, + "grad_norm": 1.4580101904782823, + "learning_rate": 7.170232160229988e-06, + "loss": 0.4788, "step": 5304 }, { - "epoch": 0.56, - "grad_norm": 3.445630959329979, - "learning_rate": 4.302690644209756e-06, - "loss": 0.6045, + "epoch": 0.38, + "grad_norm": 2.7849583802248183, + "learning_rate": 7.169196829315201e-06, + "loss": 0.5671, "step": 5305 }, { - "epoch": 0.56, - "grad_norm": 2.5016790643491187, - "learning_rate": 4.301003101338913e-06, - "loss": 0.5814, + "epoch": 0.38, + "grad_norm": 2.2024137922400366, + "learning_rate": 7.168161383816085e-06, + "loss": 0.5397, "step": 5306 }, { - "epoch": 0.56, - "grad_norm": 5.6755679527300735, - "learning_rate": 4.29931563967551e-06, - "loss": 0.6186, + "epoch": 0.38, + "grad_norm": 1.6958604140560845, + "learning_rate": 7.16712582378734e-06, + "loss": 0.5657, "step": 5307 }, { - "epoch": 0.56, - "grad_norm": 2.4930015287099927, - "learning_rate": 4.2976282594155885e-06, - "loss": 0.6656, + "epoch": 0.38, + "grad_norm": 1.523672667845232, + "learning_rate": 7.166090149283664e-06, + "loss": 0.4692, "step": 5308 }, { - "epoch": 0.56, - "grad_norm": 2.3871048273612114, - "learning_rate": 4.2959409607551885e-06, - "loss": 0.6458, + "epoch": 0.38, + "grad_norm": 1.575102442172051, + "learning_rate": 7.165054360359769e-06, + "loss": 0.581, "step": 5309 }, { - "epoch": 0.56, - "grad_norm": 2.6870418365333726, - "learning_rate": 4.294253743890331e-06, - "loss": 0.5847, + "epoch": 0.38, + "grad_norm": 2.104394982839106, + "learning_rate": 7.164018457070364e-06, + "loss": 0.5768, "step": 5310 }, { - "epoch": 0.56, - "grad_norm": 2.453388094507715, - "learning_rate": 4.292566609017032e-06, - "loss": 0.5881, + "epoch": 0.38, + "grad_norm": 1.4272713097407983, + "learning_rate": 7.162982439470172e-06, + "loss": 0.5313, "step": 5311 }, { - "epoch": 0.56, - "grad_norm": 3.1874846967139456, - "learning_rate": 4.290879556331301e-06, - "loss": 0.5902, + "epoch": 0.38, + "grad_norm": 1.5145696630465368, + "learning_rate": 7.161946307613918e-06, + "loss": 0.537, "step": 5312 }, { - "epoch": 0.56, - "grad_norm": 5.5444861113536374, - "learning_rate": 4.2891925860291315e-06, - "loss": 0.4916, + "epoch": 0.38, + "grad_norm": 1.6311822549636168, + "learning_rate": 7.160910061556337e-06, + "loss": 0.5235, "step": 5313 }, { - "epoch": 0.56, - "grad_norm": 2.2307192839807906, - "learning_rate": 4.287505698306517e-06, - "loss": 0.6353, + "epoch": 0.38, + "grad_norm": 1.5648243822087662, + "learning_rate": 7.15987370135216e-06, + "loss": 0.5182, "step": 5314 }, { - "epoch": 0.56, - "grad_norm": 2.316081716986372, - "learning_rate": 4.285818893359427e-06, - "loss": 0.6589, + "epoch": 0.38, + "grad_norm": 2.038558610786504, + "learning_rate": 7.158837227056139e-06, + "loss": 0.527, "step": 5315 }, { - "epoch": 0.56, - "grad_norm": 2.455941628341801, - "learning_rate": 4.284132171383834e-06, - "loss": 0.6596, + "epoch": 0.38, + "grad_norm": 1.6603971358090268, + "learning_rate": 7.157800638723019e-06, + "loss": 0.5676, "step": 5316 }, { - "epoch": 0.56, - "grad_norm": 3.220993513664449, - "learning_rate": 4.2824455325756955e-06, - "loss": 0.6346, + "epoch": 0.38, + "grad_norm": 1.7626033177193374, + "learning_rate": 7.1567639364075595e-06, + "loss": 0.5534, "step": 5317 }, { - "epoch": 0.56, - "grad_norm": 2.19089056244659, - "learning_rate": 4.2807589771309635e-06, - "loss": 0.5772, + "epoch": 0.38, + "grad_norm": 2.253463666463328, + "learning_rate": 7.1557271201645184e-06, + "loss": 0.5003, "step": 5318 }, { - "epoch": 0.56, - "grad_norm": 2.731529565019871, - "learning_rate": 4.2790725052455726e-06, - "loss": 0.6435, + "epoch": 0.38, + "grad_norm": 1.8631878423772479, + "learning_rate": 7.154690190048667e-06, + "loss": 0.6196, "step": 5319 }, { - "epoch": 0.56, - "grad_norm": 3.5785634546615195, - "learning_rate": 4.2773861171154525e-06, - "loss": 0.7559, + "epoch": 0.38, + "grad_norm": 1.896871517425509, + "learning_rate": 7.153653146114779e-06, + "loss": 0.5899, "step": 5320 }, { - "epoch": 0.56, - "grad_norm": 3.480519667349258, - "learning_rate": 4.275699812936526e-06, - "loss": 0.6401, + "epoch": 0.38, + "grad_norm": 1.6096923029626529, + "learning_rate": 7.152615988417634e-06, + "loss": 0.5609, "step": 5321 }, { - "epoch": 0.56, - "grad_norm": 2.4318321347360388, - "learning_rate": 4.2740135929047034e-06, - "loss": 0.6463, + "epoch": 0.38, + "grad_norm": 1.4528029308446935, + "learning_rate": 7.151578717012018e-06, + "loss": 0.5338, "step": 5322 }, { - "epoch": 0.56, - "grad_norm": 2.5507047618208354, - "learning_rate": 4.2723274572158805e-06, - "loss": 0.6721, + "epoch": 0.38, + "grad_norm": 1.776661978842978, + "learning_rate": 7.150541331952724e-06, + "loss": 0.5296, "step": 5323 }, { - "epoch": 0.56, - "grad_norm": 2.4756784180683753, - "learning_rate": 4.27064140606595e-06, - "loss": 0.6507, + "epoch": 0.38, + "grad_norm": 1.805414344253933, + "learning_rate": 7.1495038332945495e-06, + "loss": 0.5296, "step": 5324 }, { - "epoch": 0.56, - "grad_norm": 2.5608707563373527, - "learning_rate": 4.268955439650793e-06, - "loss": 0.5792, + "epoch": 0.38, + "grad_norm": 0.8610570337158563, + "learning_rate": 7.148466221092299e-06, + "loss": 0.4437, "step": 5325 }, { - "epoch": 0.56, - "grad_norm": 2.551538680904513, - "learning_rate": 4.267269558166279e-06, - "loss": 0.6255, + "epoch": 0.38, + "grad_norm": 1.8706284318661104, + "learning_rate": 7.147428495400783e-06, + "loss": 0.5656, "step": 5326 }, { - "epoch": 0.56, - "grad_norm": 2.2584831084156374, - "learning_rate": 4.265583761808272e-06, - "loss": 0.7075, + "epoch": 0.38, + "grad_norm": 1.5388671636992204, + "learning_rate": 7.146390656274817e-06, + "loss": 0.5722, "step": 5327 }, { - "epoch": 0.56, - "grad_norm": 2.4024189459356524, - "learning_rate": 4.26389805077262e-06, - "loss": 0.6714, + "epoch": 0.38, + "grad_norm": 1.8166473295465588, + "learning_rate": 7.145352703769225e-06, + "loss": 0.5237, "step": 5328 }, { - "epoch": 0.56, - "grad_norm": 2.5974293268410182, - "learning_rate": 4.262212425255164e-06, - "loss": 0.6049, + "epoch": 0.38, + "grad_norm": 1.896273333173229, + "learning_rate": 7.144314637938833e-06, + "loss": 0.6275, "step": 5329 }, { - "epoch": 0.56, - "grad_norm": 2.222020544696383, - "learning_rate": 4.260526885451734e-06, - "loss": 0.6188, + "epoch": 0.38, + "grad_norm": 1.5849504476593916, + "learning_rate": 7.143276458838476e-06, + "loss": 0.5867, "step": 5330 }, { - "epoch": 0.56, - "grad_norm": 2.637638372678464, - "learning_rate": 4.258841431558156e-06, - "loss": 0.6554, + "epoch": 0.38, + "grad_norm": 1.711993399603033, + "learning_rate": 7.1422381665229935e-06, + "loss": 0.5385, "step": 5331 }, { - "epoch": 0.56, - "grad_norm": 2.0803098388886165, - "learning_rate": 4.257156063770237e-06, - "loss": 0.6647, + "epoch": 0.38, + "grad_norm": 1.4824626341744926, + "learning_rate": 7.141199761047233e-06, + "loss": 0.5182, "step": 5332 }, { - "epoch": 0.56, - "grad_norm": 2.038548102890104, - "learning_rate": 4.2554707822837775e-06, - "loss": 0.576, + "epoch": 0.38, + "grad_norm": 2.228913694024556, + "learning_rate": 7.140161242466045e-06, + "loss": 0.5457, "step": 5333 }, { - "epoch": 0.56, - "grad_norm": 2.4381809992111414, - "learning_rate": 4.253785587294571e-06, - "loss": 0.6267, + "epoch": 0.38, + "grad_norm": 1.6002694766569634, + "learning_rate": 7.139122610834288e-06, + "loss": 0.5713, "step": 5334 }, { - "epoch": 0.56, - "grad_norm": 2.9095370655137764, - "learning_rate": 4.252100478998398e-06, - "loss": 0.5394, + "epoch": 0.38, + "grad_norm": 1.627555111726412, + "learning_rate": 7.138083866206826e-06, + "loss": 0.5696, "step": 5335 }, { - "epoch": 0.56, - "grad_norm": 2.208800982681307, - "learning_rate": 4.250415457591031e-06, - "loss": 0.5668, + "epoch": 0.38, + "grad_norm": 1.5352627976621351, + "learning_rate": 7.137045008638531e-06, + "loss": 0.5302, "step": 5336 }, { - "epoch": 0.56, - "grad_norm": 2.237278462901377, - "learning_rate": 4.248730523268227e-06, - "loss": 0.554, + "epoch": 0.38, + "grad_norm": 1.7056950348548432, + "learning_rate": 7.136006038184276e-06, + "loss": 0.5493, "step": 5337 }, { - "epoch": 0.56, - "grad_norm": 5.159594855549469, - "learning_rate": 4.24704567622574e-06, - "loss": 0.5775, + "epoch": 0.38, + "grad_norm": 1.6514193256884193, + "learning_rate": 7.1349669548989455e-06, + "loss": 0.6088, "step": 5338 }, { - "epoch": 0.56, - "grad_norm": 2.0495262092053586, - "learning_rate": 4.245360916659309e-06, - "loss": 0.6269, + "epoch": 0.38, + "grad_norm": 1.471518974111202, + "learning_rate": 7.133927758837426e-06, + "loss": 0.5069, "step": 5339 }, { - "epoch": 0.56, - "grad_norm": 2.7328601805196513, - "learning_rate": 4.243676244764667e-06, - "loss": 0.5666, + "epoch": 0.38, + "grad_norm": 1.9341273072838945, + "learning_rate": 7.13288845005461e-06, + "loss": 0.6304, "step": 5340 }, { - "epoch": 0.56, - "grad_norm": 2.599232977470765, - "learning_rate": 4.241991660737532e-06, - "loss": 0.5742, + "epoch": 0.38, + "grad_norm": 1.753497751662623, + "learning_rate": 7.1318490286053995e-06, + "loss": 0.5613, "step": 5341 }, { - "epoch": 0.56, - "grad_norm": 2.6689500818787217, - "learning_rate": 4.240307164773615e-06, - "loss": 0.6964, + "epoch": 0.38, + "grad_norm": 1.325593884994315, + "learning_rate": 7.130809494544699e-06, + "loss": 0.4739, "step": 5342 }, { - "epoch": 0.56, - "grad_norm": 2.0051800373166295, - "learning_rate": 4.238622757068614e-06, - "loss": 0.6334, + "epoch": 0.38, + "grad_norm": 3.0627626394898004, + "learning_rate": 7.1297698479274235e-06, + "loss": 0.6236, "step": 5343 }, { - "epoch": 0.56, - "grad_norm": 3.296191095173538, - "learning_rate": 4.2369384378182216e-06, - "loss": 0.5819, + "epoch": 0.38, + "grad_norm": 1.8143092595692016, + "learning_rate": 7.128730088808486e-06, + "loss": 0.6166, "step": 5344 }, { - "epoch": 0.56, - "grad_norm": 3.7526757822737973, - "learning_rate": 4.2352542072181156e-06, - "loss": 0.5592, + "epoch": 0.38, + "grad_norm": 1.6107176079528123, + "learning_rate": 7.127690217242813e-06, + "loss": 0.5647, "step": 5345 }, { - "epoch": 0.56, - "grad_norm": 4.769568113242851, - "learning_rate": 4.233570065463964e-06, - "loss": 0.6135, + "epoch": 0.38, + "grad_norm": 1.6231040606478415, + "learning_rate": 7.1266502332853325e-06, + "loss": 0.568, "step": 5346 }, { - "epoch": 0.56, - "grad_norm": 2.2793506199943447, - "learning_rate": 4.231886012751427e-06, - "loss": 0.5955, + "epoch": 0.38, + "grad_norm": 1.5726459329411413, + "learning_rate": 7.1256101369909815e-06, + "loss": 0.4863, "step": 5347 }, { - "epoch": 0.56, - "grad_norm": 2.4668991593558562, - "learning_rate": 4.230202049276152e-06, - "loss": 0.6746, + "epoch": 0.38, + "grad_norm": 3.953381203546077, + "learning_rate": 7.1245699284146986e-06, + "loss": 0.5144, "step": 5348 }, { - "epoch": 0.56, - "grad_norm": 2.273428983253158, - "learning_rate": 4.228518175233781e-06, - "loss": 0.6367, + "epoch": 0.38, + "grad_norm": 1.6071115307474362, + "learning_rate": 7.123529607611433e-06, + "loss": 0.4963, "step": 5349 }, { - "epoch": 0.56, - "grad_norm": 3.012893755691541, - "learning_rate": 4.226834390819935e-06, - "loss": 0.6371, + "epoch": 0.38, + "grad_norm": 1.5946157149527314, + "learning_rate": 7.12248917463614e-06, + "loss": 0.57, "step": 5350 }, { - "epoch": 0.56, - "grad_norm": 2.892193534980026, - "learning_rate": 4.225150696230236e-06, - "loss": 0.6217, + "epoch": 0.38, + "grad_norm": 1.3796933425491658, + "learning_rate": 7.121448629543777e-06, + "loss": 0.4516, "step": 5351 }, { - "epoch": 0.56, - "grad_norm": 2.4872260622117532, - "learning_rate": 4.223467091660287e-06, - "loss": 0.6131, + "epoch": 0.38, + "grad_norm": 1.5265133574894296, + "learning_rate": 7.1204079723893075e-06, + "loss": 0.5637, "step": 5352 }, { - "epoch": 0.56, - "grad_norm": 2.493780184626296, - "learning_rate": 4.22178357730569e-06, - "loss": 0.6648, + "epoch": 0.38, + "grad_norm": 1.730342876531837, + "learning_rate": 7.119367203227705e-06, + "loss": 0.5832, "step": 5353 }, { - "epoch": 0.56, - "grad_norm": 2.1706205845561226, - "learning_rate": 4.220100153362026e-06, - "loss": 0.6315, + "epoch": 0.38, + "grad_norm": 2.4765157149336408, + "learning_rate": 7.118326322113944e-06, + "loss": 0.483, "step": 5354 }, { - "epoch": 0.56, - "grad_norm": 2.2135535393045607, - "learning_rate": 4.2184168200248695e-06, - "loss": 0.6127, + "epoch": 0.38, + "grad_norm": 1.3610711703906517, + "learning_rate": 7.1172853291030094e-06, + "loss": 0.5203, "step": 5355 }, { - "epoch": 0.56, - "grad_norm": 3.6077079918211687, - "learning_rate": 4.21673357748979e-06, - "loss": 0.5827, + "epoch": 0.38, + "grad_norm": 1.3594934468477995, + "learning_rate": 7.116244224249888e-06, + "loss": 0.4989, "step": 5356 }, { - "epoch": 0.56, - "grad_norm": 2.2105823742172683, - "learning_rate": 4.215050425952339e-06, - "loss": 0.6933, + "epoch": 0.38, + "grad_norm": 1.5634049904091782, + "learning_rate": 7.115203007609578e-06, + "loss": 0.5895, "step": 5357 }, { - "epoch": 0.56, - "grad_norm": 2.2055870618699362, - "learning_rate": 4.213367365608061e-06, - "loss": 0.638, + "epoch": 0.38, + "grad_norm": 1.574872168860554, + "learning_rate": 7.114161679237079e-06, + "loss": 0.5778, "step": 5358 }, { - "epoch": 0.56, - "grad_norm": 2.588655796581996, - "learning_rate": 4.211684396652487e-06, - "loss": 0.6038, + "epoch": 0.38, + "grad_norm": 1.5257506963426022, + "learning_rate": 7.113120239187393e-06, + "loss": 0.5864, "step": 5359 }, { - "epoch": 0.56, - "grad_norm": 2.0448283559826437, - "learning_rate": 4.210001519281142e-06, - "loss": 0.7124, + "epoch": 0.38, + "grad_norm": 1.6948834064708003, + "learning_rate": 7.112078687515538e-06, + "loss": 0.6052, "step": 5360 }, { - "epoch": 0.56, - "grad_norm": 2.062696584888601, - "learning_rate": 4.208318733689535e-06, - "loss": 0.626, + "epoch": 0.38, + "grad_norm": 1.456109205124301, + "learning_rate": 7.111037024276529e-06, + "loss": 0.5135, "step": 5361 }, { - "epoch": 0.56, - "grad_norm": 2.604045649910468, - "learning_rate": 4.206636040073172e-06, - "loss": 0.6666, + "epoch": 0.38, + "grad_norm": 1.5374290697537834, + "learning_rate": 7.109995249525392e-06, + "loss": 0.5601, "step": 5362 }, { - "epoch": 0.56, - "grad_norm": 2.508672230990463, - "learning_rate": 4.204953438627539e-06, - "loss": 0.6892, + "epoch": 0.38, + "grad_norm": 1.4839760360499616, + "learning_rate": 7.108953363317155e-06, + "loss": 0.576, "step": 5363 }, { - "epoch": 0.56, - "grad_norm": 4.199729614432598, - "learning_rate": 4.203270929548117e-06, - "loss": 0.6088, + "epoch": 0.38, + "grad_norm": 0.7187490375057405, + "learning_rate": 7.107911365706856e-06, + "loss": 0.4648, "step": 5364 }, { - "epoch": 0.56, - "grad_norm": 2.315338636083697, - "learning_rate": 4.2015885130303745e-06, - "loss": 0.5451, + "epoch": 0.38, + "grad_norm": 1.6902380234377923, + "learning_rate": 7.106869256749537e-06, + "loss": 0.5627, "step": 5365 }, { - "epoch": 0.56, - "grad_norm": 2.8563637400295274, - "learning_rate": 4.199906189269773e-06, - "loss": 0.6707, + "epoch": 0.38, + "grad_norm": 1.4739323270343563, + "learning_rate": 7.105827036500245e-06, + "loss": 0.5366, "step": 5366 }, { - "epoch": 0.56, - "grad_norm": 3.3092435696741203, - "learning_rate": 4.198223958461756e-06, - "loss": 0.5999, + "epoch": 0.38, + "grad_norm": 0.7250956712873298, + "learning_rate": 7.104784705014032e-06, + "loss": 0.419, "step": 5367 }, { - "epoch": 0.56, - "grad_norm": 2.284714130272184, - "learning_rate": 4.196541820801761e-06, - "loss": 0.5714, + "epoch": 0.38, + "grad_norm": 1.6454346185583595, + "learning_rate": 7.103742262345959e-06, + "loss": 0.5455, "step": 5368 }, { - "epoch": 0.56, - "grad_norm": 2.208365754435486, - "learning_rate": 4.194859776485216e-06, - "loss": 0.6477, + "epoch": 0.38, + "grad_norm": 1.816008481598353, + "learning_rate": 7.1026997085510915e-06, + "loss": 0.5415, "step": 5369 }, { - "epoch": 0.57, - "grad_norm": 2.9161689800766633, - "learning_rate": 4.193177825707535e-06, - "loss": 0.605, + "epoch": 0.38, + "grad_norm": 0.8822938169963456, + "learning_rate": 7.1016570436845e-06, + "loss": 0.4744, "step": 5370 }, { - "epoch": 0.57, - "grad_norm": 2.5071516041978534, - "learning_rate": 4.191495968664122e-06, - "loss": 0.6405, + "epoch": 0.38, + "grad_norm": 1.463388817348178, + "learning_rate": 7.100614267801262e-06, + "loss": 0.5002, "step": 5371 }, { - "epoch": 0.57, - "grad_norm": 2.374183355215098, - "learning_rate": 4.189814205550369e-06, - "loss": 0.6325, + "epoch": 0.38, + "grad_norm": 1.7394591674015685, + "learning_rate": 7.099571380956462e-06, + "loss": 0.5553, "step": 5372 }, { - "epoch": 0.57, - "grad_norm": 2.9716578098089905, - "learning_rate": 4.18813253656166e-06, - "loss": 0.5657, + "epoch": 0.38, + "grad_norm": 0.7427113286051817, + "learning_rate": 7.098528383205186e-06, + "loss": 0.4568, "step": 5373 }, { - "epoch": 0.57, - "grad_norm": 2.5849556565957026, - "learning_rate": 4.186450961893366e-06, - "loss": 0.5766, + "epoch": 0.38, + "grad_norm": 1.7524652848972893, + "learning_rate": 7.097485274602531e-06, + "loss": 0.5567, "step": 5374 }, { - "epoch": 0.57, - "grad_norm": 2.6139215621325977, - "learning_rate": 4.184769481740848e-06, - "loss": 0.5995, + "epoch": 0.38, + "grad_norm": 1.7316665761664194, + "learning_rate": 7.096442055203596e-06, + "loss": 0.4785, "step": 5375 }, { - "epoch": 0.57, - "grad_norm": 2.035112537909609, - "learning_rate": 4.183088096299455e-06, - "loss": 0.6741, + "epoch": 0.38, + "grad_norm": 2.1415361911303497, + "learning_rate": 7.095398725063488e-06, + "loss": 0.6195, "step": 5376 }, { - "epoch": 0.57, - "grad_norm": 4.3310970193321285, - "learning_rate": 4.1814068057645255e-06, - "loss": 0.6492, + "epoch": 0.38, + "grad_norm": 1.73985059540259, + "learning_rate": 7.0943552842373185e-06, + "loss": 0.4716, "step": 5377 }, { - "epoch": 0.57, - "grad_norm": 2.581325181469474, - "learning_rate": 4.179725610331385e-06, - "loss": 0.5451, + "epoch": 0.38, + "grad_norm": 1.8190014522796827, + "learning_rate": 7.093311732780207e-06, + "loss": 0.626, "step": 5378 }, { - "epoch": 0.57, - "grad_norm": 4.498160977405595, - "learning_rate": 4.178044510195356e-06, - "loss": 0.657, + "epoch": 0.38, + "grad_norm": 0.8100662483649863, + "learning_rate": 7.0922680707472765e-06, + "loss": 0.4773, "step": 5379 }, { - "epoch": 0.57, - "grad_norm": 2.631101141420637, - "learning_rate": 4.176363505551737e-06, - "loss": 0.7063, + "epoch": 0.38, + "grad_norm": 2.3531442652631513, + "learning_rate": 7.091224298193658e-06, + "loss": 0.5972, "step": 5380 }, { - "epoch": 0.57, - "grad_norm": 2.528098787527815, - "learning_rate": 4.174682596595824e-06, - "loss": 0.6972, + "epoch": 0.38, + "grad_norm": 1.6646633879445845, + "learning_rate": 7.090180415174485e-06, + "loss": 0.5804, "step": 5381 }, { - "epoch": 0.57, - "grad_norm": 2.3803147400781186, - "learning_rate": 4.173001783522903e-06, - "loss": 0.6797, + "epoch": 0.38, + "grad_norm": 1.7022106885075796, + "learning_rate": 7.0891364217449e-06, + "loss": 0.5905, "step": 5382 }, { - "epoch": 0.57, - "grad_norm": 2.483697902714156, - "learning_rate": 4.171321066528243e-06, - "loss": 0.659, + "epoch": 0.38, + "grad_norm": 1.6642102763721145, + "learning_rate": 7.08809231796005e-06, + "loss": 0.5196, "step": 5383 }, { - "epoch": 0.57, - "grad_norm": 3.1064216692361284, - "learning_rate": 4.16964044580711e-06, - "loss": 0.6183, + "epoch": 0.38, + "grad_norm": 1.6949094772710465, + "learning_rate": 7.087048103875088e-06, + "loss": 0.5848, "step": 5384 }, { - "epoch": 0.57, - "grad_norm": 2.750461435944443, - "learning_rate": 4.167959921554745e-06, - "loss": 0.6222, + "epoch": 0.38, + "grad_norm": 2.607935193900281, + "learning_rate": 7.086003779545174e-06, + "loss": 0.5532, "step": 5385 }, { - "epoch": 0.57, - "grad_norm": 2.0857966955698677, - "learning_rate": 4.166279493966393e-06, - "loss": 0.6282, + "epoch": 0.38, + "grad_norm": 1.4794609341247655, + "learning_rate": 7.084959345025471e-06, + "loss": 0.5438, "step": 5386 }, { - "epoch": 0.57, - "grad_norm": 3.2748873110006333, - "learning_rate": 4.164599163237277e-06, - "loss": 0.627, + "epoch": 0.38, + "grad_norm": 1.5502661073314832, + "learning_rate": 7.0839148003711524e-06, + "loss": 0.5453, "step": 5387 }, { - "epoch": 0.57, - "grad_norm": 2.7282550442214744, - "learning_rate": 4.1629189295626195e-06, - "loss": 0.6795, + "epoch": 0.38, + "grad_norm": 1.8351242533690564, + "learning_rate": 7.0828701456373914e-06, + "loss": 0.6032, "step": 5388 }, { - "epoch": 0.57, - "grad_norm": 4.174622024247829, - "learning_rate": 4.161238793137619e-06, - "loss": 0.5899, + "epoch": 0.38, + "grad_norm": 2.0490818799866295, + "learning_rate": 7.0818253808793725e-06, + "loss": 0.5657, "step": 5389 }, { - "epoch": 0.57, - "grad_norm": 2.068250197507869, - "learning_rate": 4.159558754157469e-06, - "loss": 0.5851, + "epoch": 0.38, + "grad_norm": 2.05914619556099, + "learning_rate": 7.080780506152282e-06, + "loss": 0.5435, "step": 5390 }, { - "epoch": 0.57, - "grad_norm": 2.1242958834633563, - "learning_rate": 4.157878812817356e-06, - "loss": 0.6659, + "epoch": 0.38, + "grad_norm": 1.515721749750618, + "learning_rate": 7.079735521511315e-06, + "loss": 0.6052, "step": 5391 }, { - "epoch": 0.57, - "grad_norm": 2.7032218490370274, - "learning_rate": 4.1561989693124485e-06, - "loss": 0.6705, + "epoch": 0.38, + "grad_norm": 1.8678028849102208, + "learning_rate": 7.078690427011669e-06, + "loss": 0.5597, "step": 5392 }, { - "epoch": 0.57, - "grad_norm": 2.753703066203034, - "learning_rate": 4.154519223837904e-06, - "loss": 0.6477, + "epoch": 0.38, + "grad_norm": 1.52731061484615, + "learning_rate": 7.077645222708554e-06, + "loss": 0.5816, "step": 5393 }, { - "epoch": 0.57, - "grad_norm": 2.5189720424400934, - "learning_rate": 4.1528395765888716e-06, - "loss": 0.6326, + "epoch": 0.38, + "grad_norm": 0.750972394230337, + "learning_rate": 7.076599908657177e-06, + "loss": 0.4441, "step": 5394 }, { - "epoch": 0.57, - "grad_norm": 2.1872662432303587, - "learning_rate": 4.15116002776049e-06, - "loss": 0.6172, + "epoch": 0.38, + "grad_norm": 1.4581909869165495, + "learning_rate": 7.075554484912755e-06, + "loss": 0.4987, "step": 5395 }, { - "epoch": 0.57, - "grad_norm": 2.8914596193035806, - "learning_rate": 4.14948057754788e-06, - "loss": 0.6174, + "epoch": 0.38, + "grad_norm": 1.8442822516496875, + "learning_rate": 7.074508951530514e-06, + "loss": 0.5295, "step": 5396 }, { - "epoch": 0.57, - "grad_norm": 4.0372536675287805, - "learning_rate": 4.147801226146163e-06, - "loss": 0.5389, + "epoch": 0.38, + "grad_norm": 1.560069065837114, + "learning_rate": 7.07346330856568e-06, + "loss": 0.5836, "step": 5397 }, { - "epoch": 0.57, - "grad_norm": 3.7867058905780673, - "learning_rate": 4.146121973750431e-06, - "loss": 0.6527, + "epoch": 0.38, + "grad_norm": 2.1791714401158484, + "learning_rate": 7.072417556073489e-06, + "loss": 0.5662, "step": 5398 }, { - "epoch": 0.57, - "grad_norm": 2.3654720115387655, - "learning_rate": 4.144442820555782e-06, - "loss": 0.6418, + "epoch": 0.38, + "grad_norm": 1.5640081949964852, + "learning_rate": 7.071371694109179e-06, + "loss": 0.5498, "step": 5399 }, { - "epoch": 0.57, - "grad_norm": 2.179581313117845, - "learning_rate": 4.142763766757292e-06, - "loss": 0.635, + "epoch": 0.38, + "grad_norm": 2.1332356840745352, + "learning_rate": 7.070325722727998e-06, + "loss": 0.5504, "step": 5400 }, { - "epoch": 0.57, - "grad_norm": 2.2479561580681238, - "learning_rate": 4.141084812550031e-06, - "loss": 0.6461, + "epoch": 0.38, + "grad_norm": 1.3997797829264835, + "learning_rate": 7.069279641985197e-06, + "loss": 0.5513, "step": 5401 }, { - "epoch": 0.57, - "grad_norm": 2.841191478849272, - "learning_rate": 4.139405958129053e-06, - "loss": 0.6007, + "epoch": 0.38, + "grad_norm": 1.368895076210583, + "learning_rate": 7.068233451936035e-06, + "loss": 0.5325, "step": 5402 }, { - "epoch": 0.57, - "grad_norm": 2.0859232520906836, - "learning_rate": 4.137727203689402e-06, - "loss": 0.5766, + "epoch": 0.38, + "grad_norm": 2.107812015689986, + "learning_rate": 7.067187152635773e-06, + "loss": 0.5723, "step": 5403 }, { - "epoch": 0.57, - "grad_norm": 3.3485825067338117, - "learning_rate": 4.136048549426112e-06, - "loss": 0.6512, + "epoch": 0.38, + "grad_norm": 1.3505017663092647, + "learning_rate": 7.06614074413968e-06, + "loss": 0.5201, "step": 5404 }, { - "epoch": 0.57, - "grad_norm": 2.49496623222408, - "learning_rate": 4.134369995534206e-06, - "loss": 0.6146, + "epoch": 0.38, + "grad_norm": 1.542658168469199, + "learning_rate": 7.065094226503033e-06, + "loss": 0.5062, "step": 5405 }, { - "epoch": 0.57, - "grad_norm": 2.324007129830475, - "learning_rate": 4.132691542208691e-06, - "loss": 0.6011, + "epoch": 0.38, + "grad_norm": 1.480796787026757, + "learning_rate": 7.064047599781111e-06, + "loss": 0.5287, "step": 5406 }, { - "epoch": 0.57, - "grad_norm": 1.0793244375392315, - "learning_rate": 4.1310131896445635e-06, - "loss": 0.5288, + "epoch": 0.38, + "grad_norm": 1.6250131829381875, + "learning_rate": 7.0630008640292e-06, + "loss": 0.5999, "step": 5407 }, { - "epoch": 0.57, - "grad_norm": 3.2583878078923725, - "learning_rate": 4.129334938036813e-06, - "loss": 0.6705, + "epoch": 0.38, + "grad_norm": 1.7787521293516744, + "learning_rate": 7.061954019302594e-06, + "loss": 0.561, "step": 5408 }, { - "epoch": 0.57, - "grad_norm": 2.307137719361831, - "learning_rate": 4.127656787580412e-06, - "loss": 0.6923, + "epoch": 0.38, + "grad_norm": 1.595610059475579, + "learning_rate": 7.0609070656565904e-06, + "loss": 0.5479, "step": 5409 }, { - "epoch": 0.57, - "grad_norm": 2.578677072913258, - "learning_rate": 4.125978738470326e-06, - "loss": 0.6375, + "epoch": 0.38, + "grad_norm": 1.6561758632514592, + "learning_rate": 7.05986000314649e-06, + "loss": 0.5181, "step": 5410 }, { - "epoch": 0.57, - "grad_norm": 2.4643009262796918, - "learning_rate": 4.1243007909015016e-06, - "loss": 0.5542, + "epoch": 0.38, + "grad_norm": 1.766020191723996, + "learning_rate": 7.0588128318276085e-06, + "loss": 0.5352, "step": 5411 }, { - "epoch": 0.57, - "grad_norm": 2.2146931372972523, - "learning_rate": 4.1226229450688805e-06, - "loss": 0.6535, + "epoch": 0.38, + "grad_norm": 1.4630035742998104, + "learning_rate": 7.057765551755253e-06, + "loss": 0.5711, "step": 5412 }, { - "epoch": 0.57, - "grad_norm": 2.119950795155613, - "learning_rate": 4.120945201167388e-06, - "loss": 0.6085, + "epoch": 0.38, + "grad_norm": 1.528601663719652, + "learning_rate": 7.056718162984751e-06, + "loss": 0.5465, "step": 5413 }, { - "epoch": 0.57, - "grad_norm": 3.5062101551407485, - "learning_rate": 4.119267559391944e-06, - "loss": 0.6207, + "epoch": 0.38, + "grad_norm": 1.5523603250059257, + "learning_rate": 7.055670665571426e-06, + "loss": 0.5764, "step": 5414 }, { - "epoch": 0.57, - "grad_norm": 1.8128176772340931, - "learning_rate": 4.117590019937447e-06, - "loss": 0.5534, + "epoch": 0.38, + "grad_norm": 1.8216454465139897, + "learning_rate": 7.054623059570609e-06, + "loss": 0.5488, "step": 5415 }, { - "epoch": 0.57, - "grad_norm": 2.4544616665994945, - "learning_rate": 4.11591258299879e-06, - "loss": 0.664, + "epoch": 0.38, + "grad_norm": 1.9944500196167971, + "learning_rate": 7.053575345037641e-06, + "loss": 0.5448, "step": 5416 }, { - "epoch": 0.57, - "grad_norm": 3.5069164956287646, - "learning_rate": 4.114235248770854e-06, - "loss": 0.6393, + "epoch": 0.38, + "grad_norm": 1.9158048313881042, + "learning_rate": 7.052527522027865e-06, + "loss": 0.5853, "step": 5417 }, { - "epoch": 0.57, - "grad_norm": 2.7344010000318297, - "learning_rate": 4.112558017448508e-06, - "loss": 0.6276, + "epoch": 0.38, + "grad_norm": 0.8153086497287203, + "learning_rate": 7.051479590596631e-06, + "loss": 0.4677, "step": 5418 }, { - "epoch": 0.57, - "grad_norm": 3.5983760316820343, - "learning_rate": 4.1108808892266045e-06, - "loss": 0.6458, + "epoch": 0.38, + "grad_norm": 1.5920921623275361, + "learning_rate": 7.050431550799292e-06, + "loss": 0.4914, "step": 5419 }, { - "epoch": 0.57, - "grad_norm": 2.8082114352695724, - "learning_rate": 4.109203864299989e-06, - "loss": 0.5609, + "epoch": 0.38, + "grad_norm": 1.7123158238680598, + "learning_rate": 7.049383402691215e-06, + "loss": 0.564, "step": 5420 }, { - "epoch": 0.57, - "grad_norm": 2.364880898558155, - "learning_rate": 4.107526942863493e-06, - "loss": 0.7066, + "epoch": 0.38, + "grad_norm": 2.254198189766179, + "learning_rate": 7.048335146327759e-06, + "loss": 0.5767, "step": 5421 }, { - "epoch": 0.57, - "grad_norm": 1.9084674442912224, - "learning_rate": 4.105850125111937e-06, - "loss": 0.613, + "epoch": 0.38, + "grad_norm": 2.0337509090957266, + "learning_rate": 7.047286781764299e-06, + "loss": 0.661, "step": 5422 }, { - "epoch": 0.57, - "grad_norm": 1.039149108932299, - "learning_rate": 4.104173411240131e-06, - "loss": 0.5501, + "epoch": 0.38, + "grad_norm": 1.4887659328602472, + "learning_rate": 7.0462383090562145e-06, + "loss": 0.6186, "step": 5423 }, { - "epoch": 0.57, - "grad_norm": 2.760756741136062, - "learning_rate": 4.102496801442868e-06, - "loss": 0.7026, + "epoch": 0.38, + "grad_norm": 1.4902483318161455, + "learning_rate": 7.04518972825889e-06, + "loss": 0.5226, "step": 5424 }, { - "epoch": 0.57, - "grad_norm": 2.2973399753902295, - "learning_rate": 4.10082029591493e-06, - "loss": 0.643, + "epoch": 0.38, + "grad_norm": 1.6453592156391383, + "learning_rate": 7.044141039427713e-06, + "loss": 0.5431, "step": 5425 }, { - "epoch": 0.57, - "grad_norm": 2.5509810297461826, - "learning_rate": 4.099143894851092e-06, - "loss": 0.5958, + "epoch": 0.39, + "grad_norm": 1.6838534465345063, + "learning_rate": 7.043092242618082e-06, + "loss": 0.5274, "step": 5426 }, { - "epoch": 0.57, - "grad_norm": 1.0570994896608783, - "learning_rate": 4.097467598446113e-06, - "loss": 0.5708, + "epoch": 0.39, + "grad_norm": 1.453790897308789, + "learning_rate": 7.042043337885394e-06, + "loss": 0.537, "step": 5427 }, { - "epoch": 0.57, - "grad_norm": 2.0781495093655433, - "learning_rate": 4.095791406894739e-06, - "loss": 0.5697, + "epoch": 0.39, + "grad_norm": 1.7630412595206904, + "learning_rate": 7.0409943252850576e-06, + "loss": 0.5925, "step": 5428 }, { - "epoch": 0.57, - "grad_norm": 2.46415369130286, - "learning_rate": 4.094115320391704e-06, - "loss": 0.6917, + "epoch": 0.39, + "grad_norm": 1.8625421966308713, + "learning_rate": 7.039945204872484e-06, + "loss": 0.5401, "step": 5429 }, { - "epoch": 0.57, - "grad_norm": 2.1679299121769535, - "learning_rate": 4.0924393391317344e-06, - "loss": 0.6069, + "epoch": 0.39, + "grad_norm": 1.8192065037919496, + "learning_rate": 7.038895976703094e-06, + "loss": 0.5381, "step": 5430 }, { - "epoch": 0.57, - "grad_norm": 1.0173189356350105, - "learning_rate": 4.090763463309536e-06, - "loss": 0.5557, + "epoch": 0.39, + "grad_norm": 3.2591247971273485, + "learning_rate": 7.0378466408323085e-06, + "loss": 0.5347, "step": 5431 }, { - "epoch": 0.57, - "grad_norm": 2.570745562650635, - "learning_rate": 4.089087693119815e-06, - "loss": 0.6819, + "epoch": 0.39, + "grad_norm": 1.8032833977948368, + "learning_rate": 7.036797197315561e-06, + "loss": 0.5001, "step": 5432 }, { - "epoch": 0.57, - "grad_norm": 2.7474195051489185, - "learning_rate": 4.087412028757249e-06, - "loss": 0.621, + "epoch": 0.39, + "grad_norm": 1.970598019951489, + "learning_rate": 7.035747646208282e-06, + "loss": 0.5926, "step": 5433 }, { - "epoch": 0.57, - "grad_norm": 1.9523833561555928, - "learning_rate": 4.085736470416517e-06, - "loss": 0.6772, + "epoch": 0.39, + "grad_norm": 1.5723639009509216, + "learning_rate": 7.034697987565915e-06, + "loss": 0.5344, "step": 5434 }, { - "epoch": 0.57, - "grad_norm": 2.4968268241742986, - "learning_rate": 4.084061018292277e-06, - "loss": 0.6174, + "epoch": 0.39, + "grad_norm": 1.6946692704906028, + "learning_rate": 7.033648221443905e-06, + "loss": 0.5648, "step": 5435 }, { - "epoch": 0.57, - "grad_norm": 3.53792252658528, - "learning_rate": 4.082385672579182e-06, - "loss": 0.637, + "epoch": 0.39, + "grad_norm": 1.6972713563028743, + "learning_rate": 7.032598347897704e-06, + "loss": 0.5728, "step": 5436 }, { - "epoch": 0.57, - "grad_norm": 2.964810455872569, - "learning_rate": 4.0807104334718674e-06, - "loss": 0.6321, + "epoch": 0.39, + "grad_norm": 1.6898129461033558, + "learning_rate": 7.031548366982772e-06, + "loss": 0.5625, "step": 5437 }, { - "epoch": 0.57, - "grad_norm": 2.2794226187051545, - "learning_rate": 4.079035301164955e-06, - "loss": 0.5753, + "epoch": 0.39, + "grad_norm": 1.853672325246368, + "learning_rate": 7.0304982787545714e-06, + "loss": 0.5577, "step": 5438 }, { - "epoch": 0.57, - "grad_norm": 2.488542771422994, - "learning_rate": 4.0773602758530606e-06, - "loss": 0.6145, + "epoch": 0.39, + "grad_norm": 3.6317568198218466, + "learning_rate": 7.029448083268572e-06, + "loss": 0.6588, "step": 5439 }, { - "epoch": 0.57, - "grad_norm": 2.149185123490166, - "learning_rate": 4.0756853577307835e-06, - "loss": 0.7298, + "epoch": 0.39, + "grad_norm": 1.4887429680781104, + "learning_rate": 7.0283977805802475e-06, + "loss": 0.4885, "step": 5440 }, { - "epoch": 0.57, - "grad_norm": 19.446457701330363, - "learning_rate": 4.0740105469927084e-06, - "loss": 0.6299, + "epoch": 0.39, + "grad_norm": 1.56162197660061, + "learning_rate": 7.027347370745079e-06, + "loss": 0.5543, "step": 5441 }, { - "epoch": 0.57, - "grad_norm": 2.921362940815699, - "learning_rate": 4.07233584383341e-06, - "loss": 0.5932, + "epoch": 0.39, + "grad_norm": 2.1858434992828384, + "learning_rate": 7.026296853818553e-06, + "loss": 0.5633, "step": 5442 }, { - "epoch": 0.57, - "grad_norm": 2.436933837657758, - "learning_rate": 4.070661248447453e-06, - "loss": 0.6107, + "epoch": 0.39, + "grad_norm": 2.0119486941507585, + "learning_rate": 7.025246229856162e-06, + "loss": 0.5676, "step": 5443 }, { - "epoch": 0.57, - "grad_norm": 2.4936425865614624, - "learning_rate": 4.0689867610293845e-06, - "loss": 0.6755, + "epoch": 0.39, + "grad_norm": 0.7734841292009952, + "learning_rate": 7.0241954989134005e-06, + "loss": 0.4502, "step": 5444 }, { - "epoch": 0.57, - "grad_norm": 2.3563232675472374, - "learning_rate": 4.067312381773744e-06, - "loss": 0.5669, + "epoch": 0.39, + "grad_norm": 1.4912591003079245, + "learning_rate": 7.023144661045777e-06, + "loss": 0.546, "step": 5445 }, { - "epoch": 0.57, - "grad_norm": 5.811216819647008, - "learning_rate": 4.065638110875055e-06, - "loss": 0.6642, + "epoch": 0.39, + "grad_norm": 1.6046790966748061, + "learning_rate": 7.022093716308794e-06, + "loss": 0.5443, "step": 5446 }, { - "epoch": 0.57, - "grad_norm": 2.9029408699267543, - "learning_rate": 4.063963948527829e-06, - "loss": 0.6854, + "epoch": 0.39, + "grad_norm": 1.5910788472280488, + "learning_rate": 7.021042664757971e-06, + "loss": 0.5418, "step": 5447 }, { - "epoch": 0.57, - "grad_norm": 2.3885007406079173, - "learning_rate": 4.062289894926564e-06, - "loss": 0.6045, + "epoch": 0.39, + "grad_norm": 1.6559051280972505, + "learning_rate": 7.019991506448826e-06, + "loss": 0.5739, "step": 5448 }, { - "epoch": 0.57, - "grad_norm": 2.5249267659429764, - "learning_rate": 4.060615950265752e-06, - "loss": 0.7814, + "epoch": 0.39, + "grad_norm": 1.8186744270694613, + "learning_rate": 7.018940241436885e-06, + "loss": 0.5579, "step": 5449 }, { - "epoch": 0.57, - "grad_norm": 2.5576000799315537, - "learning_rate": 4.058942114739861e-06, - "loss": 0.6392, + "epoch": 0.39, + "grad_norm": 1.4887910428171838, + "learning_rate": 7.017888869777678e-06, + "loss": 0.5121, "step": 5450 }, { - "epoch": 0.57, - "grad_norm": 2.340626814388993, - "learning_rate": 4.057268388543354e-06, - "loss": 0.6515, + "epoch": 0.39, + "grad_norm": 1.7358732418637193, + "learning_rate": 7.016837391526745e-06, + "loss": 0.5901, "step": 5451 }, { - "epoch": 0.57, - "grad_norm": 2.366975049960535, - "learning_rate": 4.055594771870682e-06, - "loss": 0.5461, + "epoch": 0.39, + "grad_norm": 1.7320886334294041, + "learning_rate": 7.015785806739626e-06, + "loss": 0.5641, "step": 5452 }, { - "epoch": 0.57, - "grad_norm": 3.5250602391277406, - "learning_rate": 4.05392126491628e-06, - "loss": 0.6166, + "epoch": 0.39, + "grad_norm": 1.652100257570366, + "learning_rate": 7.0147341154718695e-06, + "loss": 0.5819, "step": 5453 }, { - "epoch": 0.57, - "grad_norm": 2.7710266206375826, - "learning_rate": 4.052247867874569e-06, - "loss": 0.5996, + "epoch": 0.39, + "grad_norm": 1.7959906429772756, + "learning_rate": 7.013682317779033e-06, + "loss": 0.5651, "step": 5454 }, { - "epoch": 0.57, - "grad_norm": 2.5532961974178727, - "learning_rate": 4.050574580939961e-06, - "loss": 0.6165, + "epoch": 0.39, + "grad_norm": 1.6544727742800043, + "learning_rate": 7.01263041371667e-06, + "loss": 0.5427, "step": 5455 }, { - "epoch": 0.57, - "grad_norm": 4.2749550989913345, - "learning_rate": 4.0489014043068545e-06, - "loss": 0.6004, + "epoch": 0.39, + "grad_norm": 1.7318964442697327, + "learning_rate": 7.011578403340351e-06, + "loss": 0.5632, "step": 5456 }, { - "epoch": 0.57, - "grad_norm": 2.131553043318907, - "learning_rate": 4.047228338169632e-06, - "loss": 0.6498, + "epoch": 0.39, + "grad_norm": 1.7519479590267515, + "learning_rate": 7.0105262867056436e-06, + "loss": 0.5736, "step": 5457 }, { - "epoch": 0.57, - "grad_norm": 2.574736501197967, - "learning_rate": 4.04555538272267e-06, - "loss": 0.5856, + "epoch": 0.39, + "grad_norm": 1.727841791461665, + "learning_rate": 7.009474063868126e-06, + "loss": 0.5645, "step": 5458 }, { - "epoch": 0.57, - "grad_norm": 2.1788021789713112, - "learning_rate": 4.0438825381603225e-06, - "loss": 0.6355, + "epoch": 0.39, + "grad_norm": 4.614251345014641, + "learning_rate": 7.008421734883378e-06, + "loss": 0.5001, "step": 5459 }, { - "epoch": 0.57, - "grad_norm": 3.4984416537675296, - "learning_rate": 4.042209804676937e-06, - "loss": 0.6518, + "epoch": 0.39, + "grad_norm": 1.523678192367452, + "learning_rate": 7.00736929980699e-06, + "loss": 0.5142, "step": 5460 }, { - "epoch": 0.57, - "grad_norm": 2.5902715960904383, - "learning_rate": 4.040537182466849e-06, - "loss": 0.538, + "epoch": 0.39, + "grad_norm": 0.8428398596835543, + "learning_rate": 7.006316758694553e-06, + "loss": 0.466, "step": 5461 }, { - "epoch": 0.57, - "grad_norm": 3.1931566795393427, - "learning_rate": 4.038864671724379e-06, - "loss": 0.6478, + "epoch": 0.39, + "grad_norm": 1.6524942586247529, + "learning_rate": 7.005264111601667e-06, + "loss": 0.5671, "step": 5462 }, { - "epoch": 0.57, - "grad_norm": 2.817321693238749, - "learning_rate": 4.0371922726438314e-06, - "loss": 0.6962, + "epoch": 0.39, + "grad_norm": 9.54633944524786, + "learning_rate": 7.004211358583934e-06, + "loss": 0.5291, "step": 5463 }, { - "epoch": 0.57, - "grad_norm": 2.1447873012073355, - "learning_rate": 4.035519985419502e-06, - "loss": 0.5852, + "epoch": 0.39, + "grad_norm": 1.566651070707878, + "learning_rate": 7.003158499696968e-06, + "loss": 0.6449, "step": 5464 }, { - "epoch": 0.58, - "grad_norm": 2.884660635376069, - "learning_rate": 4.033847810245673e-06, - "loss": 0.6715, + "epoch": 0.39, + "grad_norm": 1.4705532183831205, + "learning_rate": 7.002105534996382e-06, + "loss": 0.5324, "step": 5465 }, { - "epoch": 0.58, - "grad_norm": 3.4056554597314035, - "learning_rate": 4.0321757473166145e-06, - "loss": 0.6417, + "epoch": 0.39, + "grad_norm": 0.8638671651400454, + "learning_rate": 7.001052464537797e-06, + "loss": 0.4746, "step": 5466 }, { - "epoch": 0.58, - "grad_norm": 2.4034378137851213, - "learning_rate": 4.030503796826578e-06, - "loss": 0.6369, + "epoch": 0.39, + "grad_norm": 1.7290888193991074, + "learning_rate": 6.99999928837684e-06, + "loss": 0.5174, "step": 5467 }, { - "epoch": 0.58, - "grad_norm": 3.279509860886487, - "learning_rate": 4.028831958969807e-06, - "loss": 0.6686, + "epoch": 0.39, + "grad_norm": 1.736948815926927, + "learning_rate": 6.998946006569143e-06, + "loss": 0.6056, "step": 5468 }, { - "epoch": 0.58, - "grad_norm": 2.6768593400635177, - "learning_rate": 4.027160233940534e-06, - "loss": 0.6936, + "epoch": 0.39, + "grad_norm": 0.8498783917390376, + "learning_rate": 6.997892619170347e-06, + "loss": 0.4567, "step": 5469 }, { - "epoch": 0.58, - "grad_norm": 2.696418739353531, - "learning_rate": 4.02548862193297e-06, - "loss": 0.5815, + "epoch": 0.39, + "grad_norm": 1.843526462150231, + "learning_rate": 6.9968391262360904e-06, + "loss": 0.5648, "step": 5470 }, { - "epoch": 0.58, - "grad_norm": 2.8470170762266642, - "learning_rate": 4.023817123141324e-06, - "loss": 0.6268, + "epoch": 0.39, + "grad_norm": 1.7364243167559172, + "learning_rate": 6.995785527822027e-06, + "loss": 0.5954, "step": 5471 }, { - "epoch": 0.58, - "grad_norm": 2.3205700797742703, - "learning_rate": 4.022145737759781e-06, - "loss": 0.7149, + "epoch": 0.39, + "grad_norm": 1.6633747887723824, + "learning_rate": 6.994731823983809e-06, + "loss": 0.5226, "step": 5472 }, { - "epoch": 0.58, - "grad_norm": 2.4124617955340772, - "learning_rate": 4.020474465982519e-06, - "loss": 0.5885, + "epoch": 0.39, + "grad_norm": 1.7972758615268272, + "learning_rate": 6.9936780147770965e-06, + "loss": 0.484, "step": 5473 }, { - "epoch": 0.58, - "grad_norm": 2.4297597298078606, - "learning_rate": 4.0188033080037025e-06, - "loss": 0.632, + "epoch": 0.39, + "grad_norm": 1.5860602798796182, + "learning_rate": 6.992624100257554e-06, + "loss": 0.5588, "step": 5474 }, { - "epoch": 0.58, - "grad_norm": 2.7294892840221427, - "learning_rate": 4.017132264017483e-06, - "loss": 0.6413, + "epoch": 0.39, + "grad_norm": 0.7227461699474015, + "learning_rate": 6.991570080480858e-06, + "loss": 0.4557, "step": 5475 }, { - "epoch": 0.58, - "grad_norm": 1.0863421441237322, - "learning_rate": 4.015461334217995e-06, - "loss": 0.5971, + "epoch": 0.39, + "grad_norm": 1.8169240097253545, + "learning_rate": 6.99051595550268e-06, + "loss": 0.5898, "step": 5476 }, { - "epoch": 0.58, - "grad_norm": 2.5430832765858735, - "learning_rate": 4.013790518799361e-06, - "loss": 0.6309, + "epoch": 0.39, + "grad_norm": 1.7095440930807444, + "learning_rate": 6.989461725378706e-06, + "loss": 0.5597, "step": 5477 }, { - "epoch": 0.58, - "grad_norm": 2.900637212310984, - "learning_rate": 4.012119817955696e-06, - "loss": 0.5844, + "epoch": 0.39, + "grad_norm": 1.686313521872057, + "learning_rate": 6.988407390164621e-06, + "loss": 0.596, "step": 5478 }, { - "epoch": 0.58, - "grad_norm": 2.1161956263445076, - "learning_rate": 4.010449231881093e-06, - "loss": 0.6315, + "epoch": 0.39, + "grad_norm": 1.6124814712994346, + "learning_rate": 6.987352949916122e-06, + "loss": 0.5123, "step": 5479 }, { - "epoch": 0.58, - "grad_norm": 2.31955950639972, - "learning_rate": 4.00877876076964e-06, - "loss": 0.64, + "epoch": 0.39, + "grad_norm": 1.982256340420072, + "learning_rate": 6.986298404688904e-06, + "loss": 0.6007, "step": 5480 }, { - "epoch": 0.58, - "grad_norm": 2.1236471054278256, - "learning_rate": 4.0071084048154044e-06, - "loss": 0.6076, + "epoch": 0.39, + "grad_norm": 0.9050196396945963, + "learning_rate": 6.985243754538673e-06, + "loss": 0.4563, "step": 5481 }, { - "epoch": 0.58, - "grad_norm": 4.246468971263171, - "learning_rate": 4.005438164212444e-06, - "loss": 0.6235, + "epoch": 0.39, + "grad_norm": 1.8733073725496132, + "learning_rate": 6.984188999521141e-06, + "loss": 0.6038, "step": 5482 }, { - "epoch": 0.58, - "grad_norm": 2.0097201604995623, - "learning_rate": 4.0037680391548015e-06, - "loss": 0.5754, + "epoch": 0.39, + "grad_norm": 2.166917911105936, + "learning_rate": 6.983134139692023e-06, + "loss": 0.5826, "step": 5483 }, { - "epoch": 0.58, - "grad_norm": 2.2344373050878206, - "learning_rate": 4.002098029836511e-06, - "loss": 0.6543, + "epoch": 0.39, + "grad_norm": 5.609614659715804, + "learning_rate": 6.98207917510704e-06, + "loss": 0.4393, "step": 5484 }, { - "epoch": 0.58, - "grad_norm": 2.9506649394699247, - "learning_rate": 4.000428136451585e-06, - "loss": 0.5946, + "epoch": 0.39, + "grad_norm": 1.5224036003214179, + "learning_rate": 6.981024105821919e-06, + "loss": 0.5401, "step": 5485 }, { - "epoch": 0.58, - "grad_norm": 5.350295409726678, - "learning_rate": 3.998758359194028e-06, - "loss": 0.5822, + "epoch": 0.39, + "grad_norm": 1.4636484106333545, + "learning_rate": 6.9799689318923926e-06, + "loss": 0.5496, "step": 5486 }, { - "epoch": 0.58, - "grad_norm": 2.4139769523797048, - "learning_rate": 3.9970886982578314e-06, - "loss": 0.6689, + "epoch": 0.39, + "grad_norm": 1.7596583410894917, + "learning_rate": 6.978913653374196e-06, + "loss": 0.5328, "step": 5487 }, { - "epoch": 0.58, - "grad_norm": 2.322280956259487, - "learning_rate": 3.995419153836972e-06, - "loss": 0.5373, + "epoch": 0.39, + "grad_norm": 1.7885507047466982, + "learning_rate": 6.977858270323077e-06, + "loss": 0.5423, "step": 5488 }, { - "epoch": 0.58, - "grad_norm": 2.334612127720499, - "learning_rate": 3.9937497261254114e-06, - "loss": 0.741, + "epoch": 0.39, + "grad_norm": 1.437732607005903, + "learning_rate": 6.97680278279478e-06, + "loss": 0.5405, "step": 5489 }, { - "epoch": 0.58, - "grad_norm": 2.6565331932796057, - "learning_rate": 3.992080415317096e-06, - "loss": 0.6195, + "epoch": 0.39, + "grad_norm": 1.518304009580643, + "learning_rate": 6.975747190845064e-06, + "loss": 0.5385, "step": 5490 }, { - "epoch": 0.58, - "grad_norm": 2.485017788078939, - "learning_rate": 3.9904112216059656e-06, - "loss": 0.5669, + "epoch": 0.39, + "grad_norm": 0.7171058273447316, + "learning_rate": 6.974691494529686e-06, + "loss": 0.453, "step": 5491 }, { - "epoch": 0.58, - "grad_norm": 3.189719358830894, - "learning_rate": 3.988742145185941e-06, - "loss": 0.6253, + "epoch": 0.39, + "grad_norm": 1.5387649864551098, + "learning_rate": 6.973635693904413e-06, + "loss": 0.5624, "step": 5492 }, { - "epoch": 0.58, - "grad_norm": 2.8776383714391742, - "learning_rate": 3.987073186250932e-06, - "loss": 0.6488, + "epoch": 0.39, + "grad_norm": 1.5425605459738025, + "learning_rate": 6.972579789025012e-06, + "loss": 0.5281, "step": 5493 }, { - "epoch": 0.58, - "grad_norm": 3.0757770946653458, - "learning_rate": 3.985404344994831e-06, - "loss": 0.7339, + "epoch": 0.39, + "grad_norm": 1.6020729739585984, + "learning_rate": 6.971523779947266e-06, + "loss": 0.5268, "step": 5494 }, { - "epoch": 0.58, - "grad_norm": 2.9151134884480556, - "learning_rate": 3.9837356216115205e-06, - "loss": 0.6524, + "epoch": 0.39, + "grad_norm": 1.725835405501458, + "learning_rate": 6.97046766672695e-06, + "loss": 0.5702, "step": 5495 }, { - "epoch": 0.58, - "grad_norm": 2.5361472839977455, - "learning_rate": 3.982067016294868e-06, - "loss": 0.6586, + "epoch": 0.39, + "grad_norm": 2.5992457408726932, + "learning_rate": 6.969411449419857e-06, + "loss": 0.5814, "step": 5496 }, { - "epoch": 0.58, - "grad_norm": 2.1819398494243964, - "learning_rate": 3.980398529238728e-06, - "loss": 0.6144, + "epoch": 0.39, + "grad_norm": 1.5659065974527402, + "learning_rate": 6.968355128081778e-06, + "loss": 0.5765, "step": 5497 }, { - "epoch": 0.58, - "grad_norm": 9.277473751575629, - "learning_rate": 3.978730160636938e-06, - "loss": 0.5899, + "epoch": 0.39, + "grad_norm": 1.6567854670617717, + "learning_rate": 6.967298702768512e-06, + "loss": 0.5008, "step": 5498 }, { - "epoch": 0.58, - "grad_norm": 4.865134875134504, - "learning_rate": 3.977061910683325e-06, - "loss": 0.6446, + "epoch": 0.39, + "grad_norm": 1.476791477215469, + "learning_rate": 6.966242173535861e-06, + "loss": 0.4922, "step": 5499 }, { - "epoch": 0.58, - "grad_norm": 2.0322930621616004, - "learning_rate": 3.975393779571704e-06, - "loss": 0.6317, + "epoch": 0.39, + "grad_norm": 1.5080019724613851, + "learning_rate": 6.965185540439636e-06, + "loss": 0.5337, "step": 5500 }, { - "epoch": 0.58, - "grad_norm": 2.853800658322435, - "learning_rate": 3.9737257674958714e-06, - "loss": 0.5481, + "epoch": 0.39, + "grad_norm": 1.6913522053754984, + "learning_rate": 6.964128803535654e-06, + "loss": 0.5867, "step": 5501 }, { - "epoch": 0.58, - "grad_norm": 0.9879336943874001, - "learning_rate": 3.972057874649613e-06, - "loss": 0.5534, + "epoch": 0.39, + "grad_norm": 2.205258827279319, + "learning_rate": 6.96307196287973e-06, + "loss": 0.6313, "step": 5502 }, { - "epoch": 0.58, - "grad_norm": 2.222678097797317, - "learning_rate": 3.970390101226697e-06, - "loss": 0.5955, + "epoch": 0.39, + "grad_norm": 1.7385492070742876, + "learning_rate": 6.962015018527694e-06, + "loss": 0.5833, "step": 5503 }, { - "epoch": 0.58, - "grad_norm": 2.269757412226817, - "learning_rate": 3.968722447420884e-06, - "loss": 0.6627, + "epoch": 0.39, + "grad_norm": 1.833472738520152, + "learning_rate": 6.960957970535378e-06, + "loss": 0.5426, "step": 5504 }, { - "epoch": 0.58, - "grad_norm": 4.009770633477226, - "learning_rate": 3.967054913425916e-06, - "loss": 0.6221, + "epoch": 0.39, + "grad_norm": 1.9253211182477978, + "learning_rate": 6.959900818958617e-06, + "loss": 0.5623, "step": 5505 }, { - "epoch": 0.58, - "grad_norm": 2.8457569192736396, - "learning_rate": 3.965387499435524e-06, - "loss": 0.6292, + "epoch": 0.39, + "grad_norm": 1.4237409443332223, + "learning_rate": 6.958843563853253e-06, + "loss": 0.5719, "step": 5506 }, { - "epoch": 0.58, - "grad_norm": 3.254292979448844, - "learning_rate": 3.963720205643419e-06, - "loss": 0.6363, + "epoch": 0.39, + "grad_norm": 2.5716754846812706, + "learning_rate": 6.957786205275135e-06, + "loss": 0.6139, "step": 5507 }, { - "epoch": 0.58, - "grad_norm": 2.300083482731035, - "learning_rate": 3.962053032243305e-06, - "loss": 0.6737, + "epoch": 0.39, + "grad_norm": 2.0062908247689517, + "learning_rate": 6.956728743280116e-06, + "loss": 0.5996, "step": 5508 }, { - "epoch": 0.58, - "grad_norm": 3.126068404526892, - "learning_rate": 3.96038597942887e-06, - "loss": 0.689, + "epoch": 0.39, + "grad_norm": 2.0080184173781803, + "learning_rate": 6.955671177924053e-06, + "loss": 0.5285, "step": 5509 }, { - "epoch": 0.58, - "grad_norm": 2.441809460166265, - "learning_rate": 3.958719047393789e-06, - "loss": 0.6798, + "epoch": 0.39, + "grad_norm": 0.7371978294127515, + "learning_rate": 6.954613509262812e-06, + "loss": 0.4415, "step": 5510 }, { - "epoch": 0.58, - "grad_norm": 2.8015689631584326, - "learning_rate": 3.9570522363317165e-06, - "loss": 0.6766, + "epoch": 0.39, + "grad_norm": 1.6778661898277907, + "learning_rate": 6.9535557373522645e-06, + "loss": 0.5302, "step": 5511 }, { - "epoch": 0.58, - "grad_norm": 1.8277968259678123, - "learning_rate": 3.955385546436299e-06, - "loss": 0.5986, + "epoch": 0.39, + "grad_norm": 1.7695826499461158, + "learning_rate": 6.952497862248281e-06, + "loss": 0.6118, "step": 5512 }, { - "epoch": 0.58, - "grad_norm": 2.335643392259737, - "learning_rate": 3.9537189779011715e-06, - "loss": 0.5452, + "epoch": 0.39, + "grad_norm": 1.6469923096326593, + "learning_rate": 6.951439884006745e-06, + "loss": 0.6109, "step": 5513 }, { - "epoch": 0.58, - "grad_norm": 2.3708189781396936, - "learning_rate": 3.952052530919948e-06, - "loss": 0.6386, + "epoch": 0.39, + "grad_norm": 2.347956152381896, + "learning_rate": 6.950381802683542e-06, + "loss": 0.5223, "step": 5514 }, { - "epoch": 0.58, - "grad_norm": 2.838879644566167, - "learning_rate": 3.9503862056862315e-06, - "loss": 0.7274, + "epoch": 0.39, + "grad_norm": 3.627441774934008, + "learning_rate": 6.949323618334563e-06, + "loss": 0.5319, "step": 5515 }, { - "epoch": 0.58, - "grad_norm": 2.4970962843979057, - "learning_rate": 3.948720002393613e-06, - "loss": 0.6018, + "epoch": 0.39, + "grad_norm": 1.619929019935426, + "learning_rate": 6.948265331015708e-06, + "loss": 0.5797, "step": 5516 }, { - "epoch": 0.58, - "grad_norm": 2.1528209154994675, - "learning_rate": 3.947053921235665e-06, - "loss": 0.6035, + "epoch": 0.39, + "grad_norm": 1.5233634071970434, + "learning_rate": 6.947206940782873e-06, + "loss": 0.5739, "step": 5517 }, { - "epoch": 0.58, - "grad_norm": 3.223370620526591, - "learning_rate": 3.945387962405946e-06, - "loss": 0.6433, + "epoch": 0.39, + "grad_norm": 1.6489499093897342, + "learning_rate": 6.946148447691971e-06, + "loss": 0.6164, "step": 5518 }, { - "epoch": 0.58, - "grad_norm": 3.583378068602465, - "learning_rate": 3.943722126098009e-06, - "loss": 0.5599, + "epoch": 0.39, + "grad_norm": 2.061217928735542, + "learning_rate": 6.9450898517989125e-06, + "loss": 0.5913, "step": 5519 }, { - "epoch": 0.58, - "grad_norm": 2.934321798163941, - "learning_rate": 3.94205641250538e-06, - "loss": 0.7043, + "epoch": 0.39, + "grad_norm": 1.5630271619513867, + "learning_rate": 6.94403115315962e-06, + "loss": 0.4685, "step": 5520 }, { - "epoch": 0.58, - "grad_norm": 2.7272967348881823, - "learning_rate": 3.940390821821579e-06, - "loss": 0.6763, + "epoch": 0.39, + "grad_norm": 1.759024327821406, + "learning_rate": 6.9429723518300114e-06, + "loss": 0.5919, "step": 5521 }, { - "epoch": 0.58, - "grad_norm": 2.4185559104349137, - "learning_rate": 3.93872535424011e-06, - "loss": 0.6508, + "epoch": 0.39, + "grad_norm": 1.4861896090036453, + "learning_rate": 6.941913447866022e-06, + "loss": 0.5001, "step": 5522 }, { - "epoch": 0.58, - "grad_norm": 2.4863515025722562, - "learning_rate": 3.937060009954462e-06, - "loss": 0.6582, + "epoch": 0.39, + "grad_norm": 1.7021432220560504, + "learning_rate": 6.940854441323582e-06, + "loss": 0.625, "step": 5523 }, { - "epoch": 0.58, - "grad_norm": 2.89940109019646, - "learning_rate": 3.935394789158108e-06, - "loss": 0.6821, + "epoch": 0.39, + "grad_norm": 2.335887201077733, + "learning_rate": 6.939795332258635e-06, + "loss": 0.5378, "step": 5524 }, { - "epoch": 0.58, - "grad_norm": 2.850326326013693, - "learning_rate": 3.93372969204451e-06, - "loss": 0.5794, + "epoch": 0.39, + "grad_norm": 1.89099725927641, + "learning_rate": 6.938736120727125e-06, + "loss": 0.6076, "step": 5525 }, { - "epoch": 0.58, - "grad_norm": 3.1372923087151254, - "learning_rate": 3.932064718807114e-06, - "loss": 0.6803, + "epoch": 0.39, + "grad_norm": 1.5652230938459508, + "learning_rate": 6.937676806785005e-06, + "loss": 0.5307, "step": 5526 }, { - "epoch": 0.58, - "grad_norm": 2.0327932638867807, - "learning_rate": 3.930399869639353e-06, - "loss": 0.6513, + "epoch": 0.39, + "grad_norm": 1.6515498144714036, + "learning_rate": 6.936617390488229e-06, + "loss": 0.608, "step": 5527 }, { - "epoch": 0.58, - "grad_norm": 3.519770785227865, - "learning_rate": 3.9287351447346424e-06, - "loss": 0.6086, + "epoch": 0.39, + "grad_norm": 1.493605197873879, + "learning_rate": 6.935557871892762e-06, + "loss": 0.5882, "step": 5528 }, { - "epoch": 0.58, - "grad_norm": 1.0115479234131517, - "learning_rate": 3.927070544286385e-06, - "loss": 0.5529, + "epoch": 0.39, + "grad_norm": 1.8378243332029514, + "learning_rate": 6.934498251054569e-06, + "loss": 0.6378, "step": 5529 }, { - "epoch": 0.58, - "grad_norm": 1.0358399237096076, - "learning_rate": 3.925406068487972e-06, - "loss": 0.5233, + "epoch": 0.39, + "grad_norm": 2.024296820190803, + "learning_rate": 6.933438528029624e-06, + "loss": 0.5648, "step": 5530 }, { - "epoch": 0.58, - "grad_norm": 2.7457465246262487, - "learning_rate": 3.923741717532774e-06, - "loss": 0.6148, + "epoch": 0.39, + "grad_norm": 1.6553114656221715, + "learning_rate": 6.932378702873902e-06, + "loss": 0.5656, "step": 5531 }, { - "epoch": 0.58, - "grad_norm": 2.7191030649642522, - "learning_rate": 3.922077491614155e-06, - "loss": 0.653, + "epoch": 0.39, + "grad_norm": 1.835926655970203, + "learning_rate": 6.931318775643389e-06, + "loss": 0.6075, "step": 5532 }, { - "epoch": 0.58, - "grad_norm": 2.914553835743117, - "learning_rate": 3.920413390925454e-06, - "loss": 0.6825, + "epoch": 0.39, + "grad_norm": 1.6242146515764637, + "learning_rate": 6.930258746394077e-06, + "loss": 0.5741, "step": 5533 }, { - "epoch": 0.58, - "grad_norm": 2.2209209916064294, - "learning_rate": 3.918749415660005e-06, - "loss": 0.6485, + "epoch": 0.39, + "grad_norm": 1.8055460852708354, + "learning_rate": 6.929198615181956e-06, + "loss": 0.6057, "step": 5534 }, { - "epoch": 0.58, - "grad_norm": 2.6768274712210633, - "learning_rate": 3.917085566011124e-06, - "loss": 0.6383, + "epoch": 0.39, + "grad_norm": 1.83730243318912, + "learning_rate": 6.928138382063028e-06, + "loss": 0.564, "step": 5535 }, { - "epoch": 0.58, - "grad_norm": 2.07522859990918, - "learning_rate": 3.915421842172113e-06, - "loss": 0.6638, + "epoch": 0.39, + "grad_norm": 1.5367323898398595, + "learning_rate": 6.927078047093295e-06, + "loss": 0.523, "step": 5536 }, { - "epoch": 0.58, - "grad_norm": 2.4434649901286885, - "learning_rate": 3.913758244336255e-06, - "loss": 0.6297, + "epoch": 0.39, + "grad_norm": 0.7736651132226489, + "learning_rate": 6.926017610328772e-06, + "loss": 0.4768, "step": 5537 }, { - "epoch": 0.58, - "grad_norm": 3.82081715284726, - "learning_rate": 3.912094772696825e-06, - "loss": 0.6508, + "epoch": 0.39, + "grad_norm": 2.1678610537479277, + "learning_rate": 6.924957071825471e-06, + "loss": 0.5637, "step": 5538 }, { - "epoch": 0.58, - "grad_norm": 2.2583424490630546, - "learning_rate": 3.910431427447079e-06, - "loss": 0.5839, + "epoch": 0.39, + "grad_norm": 1.7923049757878826, + "learning_rate": 6.923896431639416e-06, + "loss": 0.5609, "step": 5539 }, { - "epoch": 0.58, - "grad_norm": 4.663703287673878, - "learning_rate": 3.908768208780259e-06, - "loss": 0.5589, + "epoch": 0.39, + "grad_norm": 8.208093182543358, + "learning_rate": 6.9228356898266315e-06, + "loss": 0.5766, "step": 5540 }, { - "epoch": 0.58, - "grad_norm": 2.274839976109476, - "learning_rate": 3.907105116889597e-06, - "loss": 0.6395, + "epoch": 0.39, + "grad_norm": 1.4134743765794109, + "learning_rate": 6.921774846443152e-06, + "loss": 0.5258, "step": 5541 }, { - "epoch": 0.58, - "grad_norm": 2.4342153540549925, - "learning_rate": 3.905442151968302e-06, - "loss": 0.6024, + "epoch": 0.39, + "grad_norm": 1.7137945410198139, + "learning_rate": 6.920713901545012e-06, + "loss": 0.6041, "step": 5542 }, { - "epoch": 0.58, - "grad_norm": 3.4254973540309996, - "learning_rate": 3.903779314209573e-06, - "loss": 0.612, + "epoch": 0.39, + "grad_norm": 0.7897668901644281, + "learning_rate": 6.919652855188255e-06, + "loss": 0.4498, "step": 5543 }, { - "epoch": 0.58, - "grad_norm": 2.9209988262515627, - "learning_rate": 3.902116603806594e-06, - "loss": 0.635, + "epoch": 0.39, + "grad_norm": 1.5531551614970929, + "learning_rate": 6.918591707428931e-06, + "loss": 0.591, "step": 5544 }, { - "epoch": 0.58, - "grad_norm": 2.5257899785531785, - "learning_rate": 3.900454020952537e-06, - "loss": 0.6775, + "epoch": 0.39, + "grad_norm": 1.6112693402794336, + "learning_rate": 6.917530458323092e-06, + "loss": 0.5709, "step": 5545 }, { - "epoch": 0.58, - "grad_norm": 2.231536697694151, - "learning_rate": 3.898791565840552e-06, - "loss": 0.6288, + "epoch": 0.39, + "grad_norm": 1.4587945730890088, + "learning_rate": 6.916469107926795e-06, + "loss": 0.5142, "step": 5546 }, { - "epoch": 0.58, - "grad_norm": 2.7797880961916452, - "learning_rate": 3.897129238663777e-06, - "loss": 0.5785, + "epoch": 0.39, + "grad_norm": 1.7356511428978043, + "learning_rate": 6.915407656296105e-06, + "loss": 0.5623, "step": 5547 }, { - "epoch": 0.58, - "grad_norm": 2.1833271590239662, - "learning_rate": 3.895467039615342e-06, - "loss": 0.6688, + "epoch": 0.39, + "grad_norm": 0.7955653494262135, + "learning_rate": 6.914346103487095e-06, + "loss": 0.4467, "step": 5548 }, { - "epoch": 0.58, - "grad_norm": 3.5023552603785757, - "learning_rate": 3.893804968888354e-06, - "loss": 0.5951, + "epoch": 0.39, + "grad_norm": 1.5441931072619968, + "learning_rate": 6.913284449555835e-06, + "loss": 0.5491, "step": 5549 }, { - "epoch": 0.58, - "grad_norm": 2.1779919766332645, - "learning_rate": 3.892143026675905e-06, - "loss": 0.6629, + "epoch": 0.39, + "grad_norm": 2.1256322733340283, + "learning_rate": 6.912222694558409e-06, + "loss": 0.5526, "step": 5550 }, { - "epoch": 0.58, - "grad_norm": 2.937739628950341, - "learning_rate": 3.8904812131710776e-06, - "loss": 0.6831, + "epoch": 0.39, + "grad_norm": 1.8070476473226875, + "learning_rate": 6.911160838550901e-06, + "loss": 0.5505, "step": 5551 }, { - "epoch": 0.58, - "grad_norm": 2.7094582537792893, - "learning_rate": 3.888819528566935e-06, - "loss": 0.6519, + "epoch": 0.39, + "grad_norm": 2.1615752537695063, + "learning_rate": 6.910098881589401e-06, + "loss": 0.5172, "step": 5552 }, { - "epoch": 0.58, - "grad_norm": 2.507846503527637, - "learning_rate": 3.8871579730565265e-06, - "loss": 0.6874, + "epoch": 0.39, + "grad_norm": 1.490696491603564, + "learning_rate": 6.909036823730004e-06, + "loss": 0.5499, "step": 5553 }, { - "epoch": 0.58, - "grad_norm": 2.3128367049716334, - "learning_rate": 3.885496546832891e-06, - "loss": 0.6468, + "epoch": 0.39, + "grad_norm": 1.9077516476450789, + "learning_rate": 6.907974665028816e-06, + "loss": 0.5238, "step": 5554 }, { - "epoch": 0.58, - "grad_norm": 2.18788284410742, - "learning_rate": 3.883835250089043e-06, - "loss": 0.608, + "epoch": 0.39, + "grad_norm": 1.4640424149452718, + "learning_rate": 6.906912405541939e-06, + "loss": 0.4942, "step": 5555 }, { - "epoch": 0.58, - "grad_norm": 3.0367412519509323, - "learning_rate": 3.8821740830179876e-06, - "loss": 0.6377, + "epoch": 0.39, + "grad_norm": 1.8308303948325417, + "learning_rate": 6.905850045325489e-06, + "loss": 0.5354, "step": 5556 }, { - "epoch": 0.58, - "grad_norm": 2.587808272946125, - "learning_rate": 3.880513045812718e-06, - "loss": 0.6709, + "epoch": 0.39, + "grad_norm": 1.7732798774369198, + "learning_rate": 6.90478758443558e-06, + "loss": 0.6156, "step": 5557 }, { - "epoch": 0.58, - "grad_norm": 2.8752893486354014, - "learning_rate": 3.8788521386662076e-06, - "loss": 0.6109, + "epoch": 0.39, + "grad_norm": 1.5818843559724487, + "learning_rate": 6.903725022928338e-06, + "loss": 0.5484, "step": 5558 }, { - "epoch": 0.58, - "grad_norm": 5.853722055103752, - "learning_rate": 3.8771913617714135e-06, - "loss": 0.5964, + "epoch": 0.39, + "grad_norm": 1.7172924597250256, + "learning_rate": 6.902662360859888e-06, + "loss": 0.5618, "step": 5559 }, { - "epoch": 0.59, - "grad_norm": 1.0139273382638472, - "learning_rate": 3.87553071532128e-06, - "loss": 0.57, + "epoch": 0.39, + "grad_norm": 1.6709906481632917, + "learning_rate": 6.901599598286367e-06, + "loss": 0.5384, "step": 5560 }, { - "epoch": 0.59, - "grad_norm": 9.753533947661753, - "learning_rate": 3.873870199508739e-06, - "loss": 0.6884, + "epoch": 0.39, + "grad_norm": 1.6938163143135139, + "learning_rate": 6.900536735263909e-06, + "loss": 0.6494, "step": 5561 }, { - "epoch": 0.59, - "grad_norm": 2.59431286674271, - "learning_rate": 3.872209814526703e-06, - "loss": 0.6489, + "epoch": 0.39, + "grad_norm": 1.7156430100369624, + "learning_rate": 6.899473771848664e-06, + "loss": 0.5727, "step": 5562 }, { - "epoch": 0.59, - "grad_norm": 2.118810061041824, - "learning_rate": 3.87054956056807e-06, - "loss": 0.6611, + "epoch": 0.39, + "grad_norm": 1.6170714665816175, + "learning_rate": 6.898410708096775e-06, + "loss": 0.5883, "step": 5563 }, { - "epoch": 0.59, - "grad_norm": 3.2188855308440183, - "learning_rate": 3.868889437825724e-06, - "loss": 0.6555, + "epoch": 0.39, + "grad_norm": 1.6378017042914796, + "learning_rate": 6.897347544064401e-06, + "loss": 0.5488, "step": 5564 }, { - "epoch": 0.59, - "grad_norm": 2.4614632914433274, - "learning_rate": 3.867229446492533e-06, - "loss": 0.6215, + "epoch": 0.39, + "grad_norm": 1.5170212737110325, + "learning_rate": 6.896284279807702e-06, + "loss": 0.5679, "step": 5565 }, { - "epoch": 0.59, - "grad_norm": 2.6768898443750375, - "learning_rate": 3.865569586761352e-06, - "loss": 0.6264, + "epoch": 0.39, + "grad_norm": 1.622743572919213, + "learning_rate": 6.89522091538284e-06, + "loss": 0.5785, "step": 5566 }, { - "epoch": 0.59, - "grad_norm": 2.4319918129394216, - "learning_rate": 3.863909858825016e-06, - "loss": 0.6583, + "epoch": 0.4, + "grad_norm": 1.983217406395213, + "learning_rate": 6.894157450845988e-06, + "loss": 0.4835, "step": 5567 }, { - "epoch": 0.59, - "grad_norm": 3.70131547101183, - "learning_rate": 3.86225026287635e-06, - "loss": 0.5622, + "epoch": 0.4, + "grad_norm": 1.6005597958924112, + "learning_rate": 6.8930938862533215e-06, + "loss": 0.5587, "step": 5568 }, { - "epoch": 0.59, - "grad_norm": 2.5863671241991466, - "learning_rate": 3.860590799108157e-06, - "loss": 0.6492, + "epoch": 0.4, + "grad_norm": 1.8524016956666391, + "learning_rate": 6.892030221661022e-06, + "loss": 0.5351, "step": 5569 }, { - "epoch": 0.59, - "grad_norm": 2.603038839665001, - "learning_rate": 3.858931467713233e-06, - "loss": 0.7282, + "epoch": 0.4, + "grad_norm": 1.5390459725289387, + "learning_rate": 6.890966457125272e-06, + "loss": 0.5531, "step": 5570 }, { - "epoch": 0.59, - "grad_norm": 3.2162140609274386, - "learning_rate": 3.857272268884353e-06, - "loss": 0.5941, + "epoch": 0.4, + "grad_norm": 1.7904894277321897, + "learning_rate": 6.889902592702269e-06, + "loss": 0.5465, "step": 5571 }, { - "epoch": 0.59, - "grad_norm": 2.2617877144746505, - "learning_rate": 3.855613202814277e-06, - "loss": 0.6509, + "epoch": 0.4, + "grad_norm": 1.8963662840452666, + "learning_rate": 6.888838628448207e-06, + "loss": 0.5948, "step": 5572 }, { - "epoch": 0.59, - "grad_norm": 2.560892583789322, - "learning_rate": 3.853954269695749e-06, - "loss": 0.6604, + "epoch": 0.4, + "grad_norm": 1.992793126939426, + "learning_rate": 6.887774564419288e-06, + "loss": 0.4901, "step": 5573 }, { - "epoch": 0.59, - "grad_norm": 2.487420247946007, - "learning_rate": 3.8522954697215034e-06, - "loss": 0.6302, + "epoch": 0.4, + "grad_norm": 1.3974349691706454, + "learning_rate": 6.886710400671719e-06, + "loss": 0.4968, "step": 5574 }, { - "epoch": 0.59, - "grad_norm": 2.821258946431581, - "learning_rate": 3.8506368030842525e-06, - "loss": 0.6311, + "epoch": 0.4, + "grad_norm": 1.7453139173229686, + "learning_rate": 6.885646137261714e-06, + "loss": 0.6097, "step": 5575 }, { - "epoch": 0.59, - "grad_norm": 2.3313444270659462, - "learning_rate": 3.848978269976694e-06, - "loss": 0.5945, + "epoch": 0.4, + "grad_norm": 1.8266398962139143, + "learning_rate": 6.884581774245492e-06, + "loss": 0.5843, "step": 5576 }, { - "epoch": 0.59, - "grad_norm": 2.469073388544526, - "learning_rate": 3.8473198705915135e-06, - "loss": 0.7422, + "epoch": 0.4, + "grad_norm": 2.156518215294243, + "learning_rate": 6.883517311679273e-06, + "loss": 0.5277, "step": 5577 }, { - "epoch": 0.59, - "grad_norm": 5.193626538455637, - "learning_rate": 3.845661605121377e-06, - "loss": 0.6453, + "epoch": 0.4, + "grad_norm": 1.8474835020724458, + "learning_rate": 6.882452749619288e-06, + "loss": 0.4614, "step": 5578 }, { - "epoch": 0.59, - "grad_norm": 2.5193329581868897, - "learning_rate": 3.844003473758941e-06, - "loss": 0.6308, + "epoch": 0.4, + "grad_norm": 2.071261807309986, + "learning_rate": 6.88138808812177e-06, + "loss": 0.5083, "step": 5579 }, { - "epoch": 0.59, - "grad_norm": 2.157883246431803, - "learning_rate": 3.8423454766968394e-06, - "loss": 0.6685, + "epoch": 0.4, + "grad_norm": 1.413079121466319, + "learning_rate": 6.880323327242959e-06, + "loss": 0.5769, "step": 5580 }, { - "epoch": 0.59, - "grad_norm": 2.718500163349186, - "learning_rate": 3.8406876141276924e-06, - "loss": 0.5817, + "epoch": 0.4, + "grad_norm": 1.578091386227666, + "learning_rate": 6.879258467039098e-06, + "loss": 0.4993, "step": 5581 }, { - "epoch": 0.59, - "grad_norm": 2.3243517458977667, - "learning_rate": 3.8390298862441075e-06, - "loss": 0.7218, + "epoch": 0.4, + "grad_norm": 1.625801394165498, + "learning_rate": 6.878193507566437e-06, + "loss": 0.6408, "step": 5582 }, { - "epoch": 0.59, - "grad_norm": 2.612911018329449, - "learning_rate": 3.8373722932386745e-06, - "loss": 0.6655, + "epoch": 0.4, + "grad_norm": 1.651972448241917, + "learning_rate": 6.877128448881231e-06, + "loss": 0.6023, "step": 5583 }, { - "epoch": 0.59, - "grad_norm": 3.2278773536072167, - "learning_rate": 3.835714835303969e-06, - "loss": 0.6834, + "epoch": 0.4, + "grad_norm": 1.5703094719923367, + "learning_rate": 6.876063291039739e-06, + "loss": 0.5529, "step": 5584 }, { - "epoch": 0.59, - "grad_norm": 2.6365356341700603, - "learning_rate": 3.834057512632546e-06, - "loss": 0.6425, + "epoch": 0.4, + "grad_norm": 1.572250270925388, + "learning_rate": 6.874998034098226e-06, + "loss": 0.4998, "step": 5585 }, { - "epoch": 0.59, - "grad_norm": 2.4664103802063475, - "learning_rate": 3.832400325416952e-06, - "loss": 0.5572, + "epoch": 0.4, + "grad_norm": 2.0839674689804246, + "learning_rate": 6.873932678112966e-06, + "loss": 0.5403, "step": 5586 }, { - "epoch": 0.59, - "grad_norm": 2.0773321625767425, - "learning_rate": 3.830743273849713e-06, - "loss": 0.6554, + "epoch": 0.4, + "grad_norm": 1.5214643438316249, + "learning_rate": 6.872867223140231e-06, + "loss": 0.5098, "step": 5587 }, { - "epoch": 0.59, - "grad_norm": 4.630335903459665, - "learning_rate": 3.829086358123339e-06, - "loss": 0.5597, + "epoch": 0.4, + "grad_norm": 1.382129523709888, + "learning_rate": 6.871801669236304e-06, + "loss": 0.5425, "step": 5588 }, { - "epoch": 0.59, - "grad_norm": 2.038054933842016, - "learning_rate": 3.82742957843033e-06, - "loss": 0.5566, + "epoch": 0.4, + "grad_norm": 1.7010059114305394, + "learning_rate": 6.8707360164574684e-06, + "loss": 0.545, "step": 5589 }, { - "epoch": 0.59, - "grad_norm": 7.81111225996943, - "learning_rate": 3.825772934963161e-06, - "loss": 0.7147, + "epoch": 0.4, + "grad_norm": 1.8509185231245286, + "learning_rate": 6.869670264860018e-06, + "loss": 0.4941, "step": 5590 }, { - "epoch": 0.59, - "grad_norm": 2.130623296542242, - "learning_rate": 3.824116427914298e-06, - "loss": 0.5917, + "epoch": 0.4, + "grad_norm": 1.722483900111449, + "learning_rate": 6.86860441450025e-06, + "loss": 0.5756, "step": 5591 }, { - "epoch": 0.59, - "grad_norm": 2.22553668110589, - "learning_rate": 3.82246005747619e-06, - "loss": 0.5317, + "epoch": 0.4, + "grad_norm": 1.7723268456838808, + "learning_rate": 6.867538465434464e-06, + "loss": 0.5609, "step": 5592 }, { - "epoch": 0.59, - "grad_norm": 2.470012398893969, - "learning_rate": 3.82080382384127e-06, - "loss": 0.5783, + "epoch": 0.4, + "grad_norm": 1.396617276758096, + "learning_rate": 6.866472417718969e-06, + "loss": 0.5182, "step": 5593 }, { - "epoch": 0.59, - "grad_norm": 2.112233161530673, - "learning_rate": 3.819147727201951e-06, - "loss": 0.625, + "epoch": 0.4, + "grad_norm": 1.5708317461495733, + "learning_rate": 6.8654062714100765e-06, + "loss": 0.5494, "step": 5594 }, { - "epoch": 0.59, - "grad_norm": 3.4003860684934244, - "learning_rate": 3.817491767750635e-06, - "loss": 0.656, + "epoch": 0.4, + "grad_norm": 1.4556654353668395, + "learning_rate": 6.864340026564103e-06, + "loss": 0.493, "step": 5595 }, { - "epoch": 0.59, - "grad_norm": 2.6648181797893433, - "learning_rate": 3.815835945679709e-06, - "loss": 0.6308, + "epoch": 0.4, + "grad_norm": 1.6370897174494747, + "learning_rate": 6.863273683237373e-06, + "loss": 0.5378, "step": 5596 }, { - "epoch": 0.59, - "grad_norm": 4.488479873290116, - "learning_rate": 3.81418026118154e-06, - "loss": 0.5743, + "epoch": 0.4, + "grad_norm": 3.4664310731447485, + "learning_rate": 6.862207241486214e-06, + "loss": 0.5539, "step": 5597 }, { - "epoch": 0.59, - "grad_norm": 2.520362514666106, - "learning_rate": 3.8125247144484777e-06, - "loss": 0.5858, + "epoch": 0.4, + "grad_norm": 1.3978659302217062, + "learning_rate": 6.861140701366957e-06, + "loss": 0.4836, "step": 5598 }, { - "epoch": 0.59, - "grad_norm": 2.938146052846746, - "learning_rate": 3.8108693056728636e-06, - "loss": 0.6713, + "epoch": 0.4, + "grad_norm": 1.9541324066520893, + "learning_rate": 6.860074062935942e-06, + "loss": 0.5382, "step": 5599 }, { - "epoch": 0.59, - "grad_norm": 3.143207176889715, - "learning_rate": 3.809214035047016e-06, - "loss": 0.6833, + "epoch": 0.4, + "grad_norm": 1.6012210103020015, + "learning_rate": 6.859007326249512e-06, + "loss": 0.511, "step": 5600 }, { - "epoch": 0.59, - "grad_norm": 2.205971327946951, - "learning_rate": 3.8075589027632376e-06, - "loss": 0.6633, + "epoch": 0.4, + "grad_norm": 1.957469413347741, + "learning_rate": 6.857940491364017e-06, + "loss": 0.5637, "step": 5601 }, { - "epoch": 0.59, - "grad_norm": 2.2680778570668756, - "learning_rate": 3.805903909013822e-06, - "loss": 0.5748, + "epoch": 0.4, + "grad_norm": 2.1826469063384217, + "learning_rate": 6.856873558335808e-06, + "loss": 0.5119, "step": 5602 }, { - "epoch": 0.59, - "grad_norm": 2.501171349333083, - "learning_rate": 3.804249053991037e-06, - "loss": 0.687, + "epoch": 0.4, + "grad_norm": 1.6498085167653496, + "learning_rate": 6.8558065272212485e-06, + "loss": 0.5134, "step": 5603 }, { - "epoch": 0.59, - "grad_norm": 4.102343060157223, - "learning_rate": 3.8025943378871394e-06, - "loss": 0.6342, + "epoch": 0.4, + "grad_norm": 1.958900426203835, + "learning_rate": 6.854739398076698e-06, + "loss": 0.6051, "step": 5604 }, { - "epoch": 0.59, - "grad_norm": 2.894149017249174, - "learning_rate": 3.800939760894371e-06, - "loss": 0.5931, + "epoch": 0.4, + "grad_norm": 1.5899522703933686, + "learning_rate": 6.853672170958526e-06, + "loss": 0.5082, "step": 5605 }, { - "epoch": 0.59, - "grad_norm": 2.1805773899796685, - "learning_rate": 3.7992853232049566e-06, - "loss": 0.5769, + "epoch": 0.4, + "grad_norm": 1.8109379055957895, + "learning_rate": 6.852604845923111e-06, + "loss": 0.5774, "step": 5606 }, { - "epoch": 0.59, - "grad_norm": 2.4362136260737426, - "learning_rate": 3.7976310250111013e-06, - "loss": 0.6543, + "epoch": 0.4, + "grad_norm": 1.3901996958422966, + "learning_rate": 6.851537423026831e-06, + "loss": 0.5777, "step": 5607 }, { - "epoch": 0.59, - "grad_norm": 2.9225984516059653, - "learning_rate": 3.7959768665049967e-06, - "loss": 0.6982, + "epoch": 0.4, + "grad_norm": 1.710336638828316, + "learning_rate": 6.850469902326069e-06, + "loss": 0.5847, "step": 5608 }, { - "epoch": 0.59, - "grad_norm": 2.86310538278994, - "learning_rate": 3.7943228478788198e-06, - "loss": 0.5438, + "epoch": 0.4, + "grad_norm": 1.772652010949035, + "learning_rate": 6.849402283877218e-06, + "loss": 0.5742, "step": 5609 }, { - "epoch": 0.59, - "grad_norm": 2.2140763934345613, - "learning_rate": 3.792668969324731e-06, - "loss": 0.5698, + "epoch": 0.4, + "grad_norm": 1.750357669101453, + "learning_rate": 6.848334567736671e-06, + "loss": 0.5656, "step": 5610 }, { - "epoch": 0.59, - "grad_norm": 2.5016813794002126, - "learning_rate": 3.7910152310348686e-06, - "loss": 0.7066, + "epoch": 0.4, + "grad_norm": 0.8027268503190242, + "learning_rate": 6.84726675396083e-06, + "loss": 0.4726, "step": 5611 }, { - "epoch": 0.59, - "grad_norm": 2.1455037865154742, - "learning_rate": 3.789361633201363e-06, - "loss": 0.6227, + "epoch": 0.4, + "grad_norm": 1.9940014535981887, + "learning_rate": 6.846198842606097e-06, + "loss": 0.5843, "step": 5612 }, { - "epoch": 0.59, - "grad_norm": 2.0200967442200817, - "learning_rate": 3.7877081760163225e-06, - "loss": 0.6153, + "epoch": 0.4, + "grad_norm": 2.211991474169648, + "learning_rate": 6.8451308337288865e-06, + "loss": 0.4784, "step": 5613 }, { - "epoch": 0.59, - "grad_norm": 2.386311762553055, - "learning_rate": 3.7860548596718427e-06, - "loss": 0.6632, + "epoch": 0.4, + "grad_norm": 1.4503197477331375, + "learning_rate": 6.8440627273856145e-06, + "loss": 0.6183, "step": 5614 }, { - "epoch": 0.59, - "grad_norm": 2.264330176961888, - "learning_rate": 3.784401684360001e-06, - "loss": 0.6568, + "epoch": 0.4, + "grad_norm": 1.6703423423881074, + "learning_rate": 6.8429945236327e-06, + "loss": 0.5578, "step": 5615 }, { - "epoch": 0.59, - "grad_norm": 2.761814617242924, - "learning_rate": 3.782748650272857e-06, - "loss": 0.6565, + "epoch": 0.4, + "grad_norm": 1.7677882721339337, + "learning_rate": 6.84192622252657e-06, + "loss": 0.595, "step": 5616 }, { - "epoch": 0.59, - "grad_norm": 2.076122600652747, - "learning_rate": 3.781095757602455e-06, - "loss": 0.6303, + "epoch": 0.4, + "grad_norm": 0.7995572265132935, + "learning_rate": 6.840857824123655e-06, + "loss": 0.4509, "step": 5617 }, { - "epoch": 0.59, - "grad_norm": 2.3150873597883947, - "learning_rate": 3.779443006540825e-06, - "loss": 0.6462, + "epoch": 0.4, + "grad_norm": 1.4477045485234898, + "learning_rate": 6.839789328480394e-06, + "loss": 0.5098, "step": 5618 }, { - "epoch": 0.59, - "grad_norm": 2.940963699254998, - "learning_rate": 3.7777903972799794e-06, - "loss": 0.564, + "epoch": 0.4, + "grad_norm": 1.4135206808136151, + "learning_rate": 6.838720735653225e-06, + "loss": 0.4989, "step": 5619 }, { - "epoch": 0.59, - "grad_norm": 2.4027103997644392, - "learning_rate": 3.7761379300119104e-06, - "loss": 0.61, + "epoch": 0.4, + "grad_norm": 1.5668820329403335, + "learning_rate": 6.837652045698595e-06, + "loss": 0.4968, "step": 5620 }, { - "epoch": 0.59, - "grad_norm": 2.2952639954909526, - "learning_rate": 3.7744856049286e-06, - "loss": 0.5822, + "epoch": 0.4, + "grad_norm": 1.6893947263309117, + "learning_rate": 6.836583258672958e-06, + "loss": 0.5485, "step": 5621 }, { - "epoch": 0.59, - "grad_norm": 3.0792638125829637, - "learning_rate": 3.77283342222201e-06, - "loss": 0.6502, + "epoch": 0.4, + "grad_norm": 1.69854202876631, + "learning_rate": 6.8355143746327724e-06, + "loss": 0.4553, "step": 5622 }, { - "epoch": 0.59, - "grad_norm": 0.9784622364316484, - "learning_rate": 3.7711813820840854e-06, - "loss": 0.61, + "epoch": 0.4, + "grad_norm": 1.6280083274578712, + "learning_rate": 6.834445393634495e-06, + "loss": 0.4861, "step": 5623 }, { - "epoch": 0.59, - "grad_norm": 3.0398501089056658, - "learning_rate": 3.7695294847067544e-06, - "loss": 0.641, + "epoch": 0.4, + "grad_norm": 2.024542693167338, + "learning_rate": 6.833376315734599e-06, + "loss": 0.5782, "step": 5624 }, { - "epoch": 0.59, - "grad_norm": 3.801664578402942, - "learning_rate": 3.7678777302819314e-06, - "loss": 0.5783, + "epoch": 0.4, + "grad_norm": 1.4717995400132047, + "learning_rate": 6.832307140989551e-06, + "loss": 0.5364, "step": 5625 }, { - "epoch": 0.59, - "grad_norm": 2.4911044008283074, - "learning_rate": 3.7662261190015116e-06, - "loss": 0.7241, + "epoch": 0.4, + "grad_norm": 1.5976753478418442, + "learning_rate": 6.831237869455833e-06, + "loss": 0.5397, "step": 5626 }, { - "epoch": 0.59, - "grad_norm": 2.3853448216389594, - "learning_rate": 3.7645746510573754e-06, - "loss": 0.6626, + "epoch": 0.4, + "grad_norm": 1.710974527552355, + "learning_rate": 6.830168501189924e-06, + "loss": 0.6024, "step": 5627 }, { - "epoch": 0.59, - "grad_norm": 2.131535326971549, - "learning_rate": 3.7629233266413866e-06, - "loss": 0.64, + "epoch": 0.4, + "grad_norm": 1.5456147954508466, + "learning_rate": 6.829099036248313e-06, + "loss": 0.551, "step": 5628 }, { - "epoch": 0.59, - "grad_norm": 1.9389020030909176, - "learning_rate": 3.7612721459453883e-06, - "loss": 0.5731, + "epoch": 0.4, + "grad_norm": 1.6950299355321186, + "learning_rate": 6.828029474687494e-06, + "loss": 0.5807, "step": 5629 }, { - "epoch": 0.59, - "grad_norm": 2.7087185491547947, - "learning_rate": 3.75962110916121e-06, - "loss": 0.582, + "epoch": 0.4, + "grad_norm": 1.6615545251474442, + "learning_rate": 6.826959816563964e-06, + "loss": 0.5709, "step": 5630 }, { - "epoch": 0.59, - "grad_norm": 2.475217811511566, - "learning_rate": 3.757970216480667e-06, - "loss": 0.6372, + "epoch": 0.4, + "grad_norm": 7.472720454767171, + "learning_rate": 6.825890061934226e-06, + "loss": 0.5079, "step": 5631 }, { - "epoch": 0.59, - "grad_norm": 2.4037718481514765, - "learning_rate": 3.756319468095555e-06, - "loss": 0.6331, + "epoch": 0.4, + "grad_norm": 1.4868501281574187, + "learning_rate": 6.824820210854788e-06, + "loss": 0.56, "step": 5632 }, { - "epoch": 0.59, - "grad_norm": 3.020609700875131, - "learning_rate": 3.7546688641976496e-06, - "loss": 0.6931, + "epoch": 0.4, + "grad_norm": 1.561822855805966, + "learning_rate": 6.823750263382164e-06, + "loss": 0.5152, "step": 5633 }, { - "epoch": 0.59, - "grad_norm": 4.592025998510074, - "learning_rate": 3.753018404978717e-06, - "loss": 0.5209, + "epoch": 0.4, + "grad_norm": 0.834757427001846, + "learning_rate": 6.822680219572869e-06, + "loss": 0.4673, "step": 5634 }, { - "epoch": 0.59, - "grad_norm": 2.501978181917583, - "learning_rate": 3.7513680906305015e-06, - "loss": 0.5572, + "epoch": 0.4, + "grad_norm": 2.038292723489558, + "learning_rate": 6.82161007948343e-06, + "loss": 0.5605, "step": 5635 }, { - "epoch": 0.59, - "grad_norm": 2.965457135634429, - "learning_rate": 3.7497179213447305e-06, - "loss": 0.6254, + "epoch": 0.4, + "grad_norm": 1.7401537070849864, + "learning_rate": 6.820539843170374e-06, + "loss": 0.4735, "step": 5636 }, { - "epoch": 0.59, - "grad_norm": 2.294791076548485, - "learning_rate": 3.7480678973131198e-06, - "loss": 0.6553, + "epoch": 0.4, + "grad_norm": 1.7129843580890953, + "learning_rate": 6.8194695106902355e-06, + "loss": 0.5399, "step": 5637 }, { - "epoch": 0.59, - "grad_norm": 2.4361429134308956, - "learning_rate": 3.74641801872736e-06, - "loss": 0.645, + "epoch": 0.4, + "grad_norm": 2.9274250569026057, + "learning_rate": 6.8183990820995514e-06, + "loss": 0.6126, "step": 5638 }, { - "epoch": 0.59, - "grad_norm": 2.229172173470131, - "learning_rate": 3.7447682857791307e-06, - "loss": 0.6655, + "epoch": 0.4, + "grad_norm": 1.5795876338278925, + "learning_rate": 6.817328557454868e-06, + "loss": 0.5424, "step": 5639 }, { - "epoch": 0.59, - "grad_norm": 2.5927694332695186, - "learning_rate": 3.743118698660094e-06, - "loss": 0.5953, + "epoch": 0.4, + "grad_norm": 1.727461184021476, + "learning_rate": 6.816257936812729e-06, + "loss": 0.4931, "step": 5640 }, { - "epoch": 0.59, - "grad_norm": 2.419854503906456, - "learning_rate": 3.741469257561895e-06, - "loss": 0.6415, + "epoch": 0.4, + "grad_norm": 1.5132400771075096, + "learning_rate": 6.8151872202296935e-06, + "loss": 0.5314, "step": 5641 }, { - "epoch": 0.59, - "grad_norm": 2.454076430219235, - "learning_rate": 3.739819962676159e-06, - "loss": 0.677, + "epoch": 0.4, + "grad_norm": 1.5956439730358498, + "learning_rate": 6.8141164077623165e-06, + "loss": 0.5094, "step": 5642 }, { - "epoch": 0.59, - "grad_norm": 2.333031240171135, - "learning_rate": 3.738170814194495e-06, - "loss": 0.5559, + "epoch": 0.4, + "grad_norm": 1.9182576340802884, + "learning_rate": 6.813045499467167e-06, + "loss": 0.5434, "step": 5643 }, { - "epoch": 0.59, - "grad_norm": 2.427282486291471, - "learning_rate": 3.7365218123084996e-06, - "loss": 0.6336, + "epoch": 0.4, + "grad_norm": 2.40139825165419, + "learning_rate": 6.811974495400808e-06, + "loss": 0.6132, "step": 5644 }, { - "epoch": 0.59, - "grad_norm": 2.692927524150578, - "learning_rate": 3.7348729572097487e-06, - "loss": 0.5909, + "epoch": 0.4, + "grad_norm": 1.5540229804362857, + "learning_rate": 6.810903395619816e-06, + "loss": 0.5442, "step": 5645 }, { - "epoch": 0.59, - "grad_norm": 2.406995986734407, - "learning_rate": 3.7332242490897985e-06, - "loss": 0.6221, + "epoch": 0.4, + "grad_norm": 1.6818075802953034, + "learning_rate": 6.809832200180774e-06, + "loss": 0.5657, "step": 5646 }, { - "epoch": 0.59, - "grad_norm": 2.735312603120759, - "learning_rate": 3.7315756881401944e-06, - "loss": 0.6724, + "epoch": 0.4, + "grad_norm": 1.89080822850704, + "learning_rate": 6.80876090914026e-06, + "loss": 0.5161, "step": 5647 }, { - "epoch": 0.59, - "grad_norm": 3.4788360617314944, - "learning_rate": 3.7299272745524583e-06, - "loss": 0.634, + "epoch": 0.4, + "grad_norm": 1.6960860794238837, + "learning_rate": 6.807689522554867e-06, + "loss": 0.5928, "step": 5648 }, { - "epoch": 0.59, - "grad_norm": 2.280769795287397, - "learning_rate": 3.728279008518102e-06, - "loss": 0.5864, + "epoch": 0.4, + "grad_norm": 2.459358962203065, + "learning_rate": 6.806618040481186e-06, + "loss": 0.5379, "step": 5649 }, { - "epoch": 0.59, - "grad_norm": 2.7064070384851253, - "learning_rate": 3.726630890228615e-06, - "loss": 0.6402, + "epoch": 0.4, + "grad_norm": 1.6556605118514494, + "learning_rate": 6.805546462975821e-06, + "loss": 0.5304, "step": 5650 }, { - "epoch": 0.59, - "grad_norm": 3.801131241287733, - "learning_rate": 3.7249829198754694e-06, - "loss": 0.6281, + "epoch": 0.4, + "grad_norm": 0.7143538680416827, + "learning_rate": 6.804474790095373e-06, + "loss": 0.4345, "step": 5651 }, { - "epoch": 0.59, - "grad_norm": 2.172639276357326, - "learning_rate": 3.7233350976501217e-06, - "loss": 0.6297, + "epoch": 0.4, + "grad_norm": 1.4176980928079508, + "learning_rate": 6.803403021896451e-06, + "loss": 0.4786, "step": 5652 }, { - "epoch": 0.59, - "grad_norm": 1.9375269933216765, - "learning_rate": 3.7216874237440127e-06, - "loss": 0.5696, + "epoch": 0.4, + "grad_norm": 0.8445155104414335, + "learning_rate": 6.802331158435671e-06, + "loss": 0.4549, "step": 5653 }, { - "epoch": 0.59, - "grad_norm": 2.3229567583419217, - "learning_rate": 3.7200398983485643e-06, - "loss": 0.6104, + "epoch": 0.4, + "grad_norm": 3.512228846867844, + "learning_rate": 6.801259199769654e-06, + "loss": 0.5791, "step": 5654 }, { - "epoch": 0.6, - "grad_norm": 2.523904866991665, - "learning_rate": 3.7183925216551784e-06, - "loss": 0.6879, + "epoch": 0.4, + "grad_norm": 1.4877928966756284, + "learning_rate": 6.80018714595502e-06, + "loss": 0.572, "step": 5655 }, { - "epoch": 0.6, - "grad_norm": 2.1913564898938622, - "learning_rate": 3.716745293855246e-06, - "loss": 0.5874, + "epoch": 0.4, + "grad_norm": 1.8087461190308793, + "learning_rate": 6.799114997048402e-06, + "loss": 0.5811, "step": 5656 }, { - "epoch": 0.6, - "grad_norm": 2.2827810373182724, - "learning_rate": 3.715098215140136e-06, - "loss": 0.6078, + "epoch": 0.4, + "grad_norm": 1.6387938712991292, + "learning_rate": 6.7980427531064334e-06, + "loss": 0.502, "step": 5657 }, { - "epoch": 0.6, - "grad_norm": 5.128545296474594, - "learning_rate": 3.7134512857012017e-06, - "loss": 0.6683, + "epoch": 0.4, + "grad_norm": 0.827290834218492, + "learning_rate": 6.796970414185755e-06, + "loss": 0.4548, "step": 5658 }, { - "epoch": 0.6, - "grad_norm": 2.9857064043435133, - "learning_rate": 3.711804505729776e-06, - "loss": 0.5813, + "epoch": 0.4, + "grad_norm": 1.6467961269597464, + "learning_rate": 6.795897980343009e-06, + "loss": 0.4916, "step": 5659 }, { - "epoch": 0.6, - "grad_norm": 2.632396638894393, - "learning_rate": 3.7101578754171797e-06, - "loss": 0.6517, + "epoch": 0.4, + "grad_norm": 1.5919611647926077, + "learning_rate": 6.794825451634848e-06, + "loss": 0.5503, "step": 5660 }, { - "epoch": 0.6, - "grad_norm": 2.2170898683792815, - "learning_rate": 3.7085113949547126e-06, - "loss": 0.6747, + "epoch": 0.4, + "grad_norm": 2.444908573300306, + "learning_rate": 6.793752828117924e-06, + "loss": 0.5125, "step": 5661 }, { - "epoch": 0.6, - "grad_norm": 2.1974382543005357, - "learning_rate": 3.706865064533659e-06, - "loss": 0.6875, + "epoch": 0.4, + "grad_norm": 1.628727627656973, + "learning_rate": 6.792680109848897e-06, + "loss": 0.5138, "step": 5662 }, { - "epoch": 0.6, - "grad_norm": 3.3819380364992115, - "learning_rate": 3.7052188843452854e-06, - "loss": 0.622, + "epoch": 0.4, + "grad_norm": 0.8305852345271679, + "learning_rate": 6.791607296884433e-06, + "loss": 0.4642, "step": 5663 }, { - "epoch": 0.6, - "grad_norm": 2.682752126582236, - "learning_rate": 3.7035728545808367e-06, - "loss": 0.6522, + "epoch": 0.4, + "grad_norm": 1.6246094028930107, + "learning_rate": 6.790534389281201e-06, + "loss": 0.6007, "step": 5664 }, { - "epoch": 0.6, - "grad_norm": 2.042183074281712, - "learning_rate": 3.701926975431547e-06, - "loss": 0.6193, + "epoch": 0.4, + "grad_norm": 1.5118590842739732, + "learning_rate": 6.7894613870958746e-06, + "loss": 0.5545, "step": 5665 }, { - "epoch": 0.6, - "grad_norm": 1.0167480891393266, - "learning_rate": 3.700281247088629e-06, - "loss": 0.5478, + "epoch": 0.4, + "grad_norm": 2.0338614362699827, + "learning_rate": 6.788388290385135e-06, + "loss": 0.5998, "step": 5666 }, { - "epoch": 0.6, - "grad_norm": 3.190095939887631, - "learning_rate": 3.6986356697432796e-06, - "loss": 0.6033, + "epoch": 0.4, + "grad_norm": 1.555367359286219, + "learning_rate": 6.7873150992056656e-06, + "loss": 0.5793, "step": 5667 }, { - "epoch": 0.6, - "grad_norm": 2.3881960936207682, - "learning_rate": 3.6969902435866743e-06, - "loss": 0.5799, + "epoch": 0.4, + "grad_norm": 1.8047183803600875, + "learning_rate": 6.786241813614156e-06, + "loss": 0.4846, "step": 5668 }, { - "epoch": 0.6, - "grad_norm": 2.4289590644832786, - "learning_rate": 3.6953449688099774e-06, - "loss": 0.7183, + "epoch": 0.4, + "grad_norm": 1.8413359563596212, + "learning_rate": 6.785168433667302e-06, + "loss": 0.5069, "step": 5669 }, { - "epoch": 0.6, - "grad_norm": 2.15648426463186, - "learning_rate": 3.69369984560433e-06, - "loss": 0.6219, + "epoch": 0.4, + "grad_norm": 1.550149455290965, + "learning_rate": 6.784094959421802e-06, + "loss": 0.5626, "step": 5670 }, { - "epoch": 0.6, - "grad_norm": 2.679139642427304, - "learning_rate": 3.69205487416086e-06, - "loss": 0.6284, + "epoch": 0.4, + "grad_norm": 1.4433808141021034, + "learning_rate": 6.783021390934361e-06, + "loss": 0.4468, "step": 5671 }, { - "epoch": 0.6, - "grad_norm": 3.199487911029984, - "learning_rate": 3.690410054670671e-06, - "loss": 0.6225, + "epoch": 0.4, + "grad_norm": 0.7575777218678642, + "learning_rate": 6.781947728261687e-06, + "loss": 0.4616, "step": 5672 }, { - "epoch": 0.6, - "grad_norm": 2.927509209876508, - "learning_rate": 3.6887653873248575e-06, - "loss": 0.6316, + "epoch": 0.4, + "grad_norm": 1.5836416875934447, + "learning_rate": 6.780873971460499e-06, + "loss": 0.5076, "step": 5673 }, { - "epoch": 0.6, - "grad_norm": 2.9002460059852595, - "learning_rate": 3.6871208723144903e-06, - "loss": 0.6826, + "epoch": 0.4, + "grad_norm": 1.718955560214106, + "learning_rate": 6.779800120587511e-06, + "loss": 0.5346, "step": 5674 }, { - "epoch": 0.6, - "grad_norm": 2.2930525319126507, - "learning_rate": 3.6854765098306254e-06, - "loss": 0.5643, + "epoch": 0.4, + "grad_norm": 1.7507806051631802, + "learning_rate": 6.778726175699451e-06, + "loss": 0.5368, "step": 5675 }, { - "epoch": 0.6, - "grad_norm": 2.809686719399776, - "learning_rate": 3.6838323000643013e-06, - "loss": 0.6805, + "epoch": 0.4, + "grad_norm": 2.6249175609672846, + "learning_rate": 6.7776521368530455e-06, + "loss": 0.5365, "step": 5676 }, { - "epoch": 0.6, - "grad_norm": 2.6784212261701166, - "learning_rate": 3.682188243206535e-06, - "loss": 0.6072, + "epoch": 0.4, + "grad_norm": 1.6974375994840956, + "learning_rate": 6.776578004105032e-06, + "loss": 0.4653, "step": 5677 }, { - "epoch": 0.6, - "grad_norm": 1.1335859555703096, - "learning_rate": 3.6805443394483275e-06, - "loss": 0.5365, + "epoch": 0.4, + "grad_norm": 1.9163559566381556, + "learning_rate": 6.775503777512149e-06, + "loss": 0.5867, "step": 5678 }, { - "epoch": 0.6, - "grad_norm": 2.477277761969307, - "learning_rate": 3.6789005889806656e-06, - "loss": 0.7198, + "epoch": 0.4, + "grad_norm": 1.6165440078703628, + "learning_rate": 6.774429457131139e-06, + "loss": 0.5397, "step": 5679 }, { - "epoch": 0.6, - "grad_norm": 2.1807624837158626, - "learning_rate": 3.6772569919945157e-06, - "loss": 0.6629, + "epoch": 0.4, + "grad_norm": 3.0008870392777984, + "learning_rate": 6.773355043018753e-06, + "loss": 0.5501, "step": 5680 }, { - "epoch": 0.6, - "grad_norm": 2.278161138807236, - "learning_rate": 3.6756135486808227e-06, - "loss": 0.5702, + "epoch": 0.4, + "grad_norm": 1.5696745764839155, + "learning_rate": 6.7722805352317446e-06, + "loss": 0.5793, "step": 5681 }, { - "epoch": 0.6, - "grad_norm": 2.433937775862514, - "learning_rate": 3.6739702592305205e-06, - "loss": 0.6464, + "epoch": 0.4, + "grad_norm": 0.7715710307914138, + "learning_rate": 6.771205933826874e-06, + "loss": 0.4428, "step": 5682 }, { - "epoch": 0.6, - "grad_norm": 3.0575854854067104, - "learning_rate": 3.6723271238345187e-06, - "loss": 0.674, + "epoch": 0.4, + "grad_norm": 0.8051298144770578, + "learning_rate": 6.770131238860903e-06, + "loss": 0.4615, "step": 5683 }, { - "epoch": 0.6, - "grad_norm": 3.607667973961037, - "learning_rate": 3.6706841426837145e-06, - "loss": 0.6982, + "epoch": 0.4, + "grad_norm": 1.573780428461699, + "learning_rate": 6.769056450390603e-06, + "loss": 0.5455, "step": 5684 }, { - "epoch": 0.6, - "grad_norm": 2.3855563143332543, - "learning_rate": 3.669041315968986e-06, - "loss": 0.6425, + "epoch": 0.4, + "grad_norm": 0.8328773318504669, + "learning_rate": 6.7679815684727455e-06, + "loss": 0.4601, "step": 5685 }, { - "epoch": 0.6, - "grad_norm": 2.551518686262234, - "learning_rate": 3.667398643881189e-06, - "loss": 0.6379, + "epoch": 0.4, + "grad_norm": 1.5050988930162001, + "learning_rate": 6.766906593164111e-06, + "loss": 0.5287, "step": 5686 }, { - "epoch": 0.6, - "grad_norm": 2.2344906686707855, - "learning_rate": 3.665756126611164e-06, - "loss": 0.6542, + "epoch": 0.4, + "grad_norm": 1.6102258322150869, + "learning_rate": 6.765831524521484e-06, + "loss": 0.5765, "step": 5687 }, { - "epoch": 0.6, - "grad_norm": 2.892933147579963, - "learning_rate": 3.664113764349736e-06, - "loss": 0.6753, + "epoch": 0.4, + "grad_norm": 1.691210294969553, + "learning_rate": 6.7647563626016525e-06, + "loss": 0.4971, "step": 5688 }, { - "epoch": 0.6, - "grad_norm": 2.899716923671387, - "learning_rate": 3.6624715572877106e-06, - "loss": 0.5615, + "epoch": 0.4, + "grad_norm": 1.4711216302559886, + "learning_rate": 6.76368110746141e-06, + "loss": 0.5253, "step": 5689 }, { - "epoch": 0.6, - "grad_norm": 2.52564711595407, - "learning_rate": 3.6608295056158717e-06, - "loss": 0.6426, + "epoch": 0.4, + "grad_norm": 1.7436965116512015, + "learning_rate": 6.762605759157555e-06, + "loss": 0.537, "step": 5690 }, { - "epoch": 0.6, - "grad_norm": 3.2313531578252412, - "learning_rate": 3.65918760952499e-06, - "loss": 0.5662, + "epoch": 0.4, + "grad_norm": 1.6577416075556308, + "learning_rate": 6.761530317746892e-06, + "loss": 0.604, "step": 5691 }, { - "epoch": 0.6, - "grad_norm": 2.5379514454376904, - "learning_rate": 3.657545869205816e-06, - "loss": 0.5762, + "epoch": 0.4, + "grad_norm": 1.932992900789687, + "learning_rate": 6.7604547832862276e-06, + "loss": 0.5117, "step": 5692 }, { - "epoch": 0.6, - "grad_norm": 2.9538894091444807, - "learning_rate": 3.6559042848490835e-06, - "loss": 0.5906, + "epoch": 0.4, + "grad_norm": 0.8142297446367894, + "learning_rate": 6.759379155832377e-06, + "loss": 0.469, "step": 5693 }, { - "epoch": 0.6, - "grad_norm": 2.6382651155593604, - "learning_rate": 3.654262856645503e-06, - "loss": 0.6264, + "epoch": 0.4, + "grad_norm": 2.145549117706357, + "learning_rate": 6.758303435442156e-06, + "loss": 0.589, "step": 5694 }, { - "epoch": 0.6, - "grad_norm": 3.482444707821686, - "learning_rate": 3.652621584785776e-06, - "loss": 0.6596, + "epoch": 0.4, + "grad_norm": 1.7448035989312431, + "learning_rate": 6.757227622172392e-06, + "loss": 0.5123, "step": 5695 }, { - "epoch": 0.6, - "grad_norm": 2.2566669898698146, - "learning_rate": 3.6509804694605768e-06, - "loss": 0.7118, + "epoch": 0.4, + "grad_norm": 1.6041367449204575, + "learning_rate": 6.7561517160799095e-06, + "loss": 0.5434, "step": 5696 }, { - "epoch": 0.6, - "grad_norm": 3.5669855228140213, - "learning_rate": 3.649339510860568e-06, - "loss": 0.5655, + "epoch": 0.4, + "grad_norm": 1.5159738046616373, + "learning_rate": 6.755075717221544e-06, + "loss": 0.5296, "step": 5697 }, { - "epoch": 0.6, - "grad_norm": 2.3661369567618866, - "learning_rate": 3.647698709176391e-06, - "loss": 0.6338, + "epoch": 0.4, + "grad_norm": 1.6545479522505093, + "learning_rate": 6.75399962565413e-06, + "loss": 0.5924, "step": 5698 }, { - "epoch": 0.6, - "grad_norm": 2.3484815033901807, - "learning_rate": 3.6460580645986685e-06, - "loss": 0.6255, + "epoch": 0.4, + "grad_norm": 1.6165168522602136, + "learning_rate": 6.752923441434514e-06, + "loss": 0.501, "step": 5699 }, { - "epoch": 0.6, - "grad_norm": 2.53755595996656, - "learning_rate": 3.6444175773180045e-06, - "loss": 0.5636, + "epoch": 0.4, + "grad_norm": 1.77212819530055, + "learning_rate": 6.751847164619543e-06, + "loss": 0.5602, "step": 5700 }, { - "epoch": 0.6, - "grad_norm": 2.5222433345809763, - "learning_rate": 3.6427772475249896e-06, - "loss": 0.6518, + "epoch": 0.4, + "grad_norm": 1.697554077463077, + "learning_rate": 6.7507707952660665e-06, + "loss": 0.5596, "step": 5701 }, { - "epoch": 0.6, - "grad_norm": 2.586007819868345, - "learning_rate": 3.6411370754101915e-06, - "loss": 0.6029, + "epoch": 0.4, + "grad_norm": 2.2542574726760134, + "learning_rate": 6.749694333430944e-06, + "loss": 0.4851, "step": 5702 }, { - "epoch": 0.6, - "grad_norm": 5.428940429081987, - "learning_rate": 3.639497061164158e-06, - "loss": 0.5528, + "epoch": 0.4, + "grad_norm": 1.6885383612703997, + "learning_rate": 6.748617779171041e-06, + "loss": 0.5111, "step": 5703 }, { - "epoch": 0.6, - "grad_norm": 2.60801073186904, - "learning_rate": 3.637857204977424e-06, - "loss": 0.5935, + "epoch": 0.4, + "grad_norm": 1.574852551048577, + "learning_rate": 6.747541132543218e-06, + "loss": 0.4878, "step": 5704 }, { - "epoch": 0.6, - "grad_norm": 3.8916382563951037, - "learning_rate": 3.636217507040502e-06, - "loss": 0.6435, + "epoch": 0.4, + "grad_norm": 1.5277930125887604, + "learning_rate": 6.746464393604354e-06, + "loss": 0.4939, "step": 5705 }, { - "epoch": 0.6, - "grad_norm": 0.9863942243849351, - "learning_rate": 3.6345779675438897e-06, - "loss": 0.5717, + "epoch": 0.4, + "grad_norm": 2.0976845936789403, + "learning_rate": 6.745387562411321e-06, + "loss": 0.5536, "step": 5706 }, { - "epoch": 0.6, - "grad_norm": 7.47039659299787, - "learning_rate": 3.6329385866780587e-06, - "loss": 0.5755, + "epoch": 0.4, + "grad_norm": 1.6130580734348265, + "learning_rate": 6.744310639021003e-06, + "loss": 0.5271, "step": 5707 }, { - "epoch": 0.6, - "grad_norm": 2.7898417007114333, - "learning_rate": 3.6312993646334727e-06, - "loss": 0.5874, + "epoch": 0.41, + "grad_norm": 2.01339669451809, + "learning_rate": 6.743233623490287e-06, + "loss": 0.5094, "step": 5708 }, { - "epoch": 0.6, - "grad_norm": 2.273917598473412, - "learning_rate": 3.6296603016005693e-06, - "loss": 0.6866, + "epoch": 0.41, + "grad_norm": 0.7905027224675524, + "learning_rate": 6.742156515876064e-06, + "loss": 0.4653, "step": 5709 }, { - "epoch": 0.6, - "grad_norm": 2.6107303675720432, - "learning_rate": 3.6280213977697715e-06, - "loss": 0.6771, + "epoch": 0.41, + "grad_norm": 1.5905269142101468, + "learning_rate": 6.741079316235231e-06, + "loss": 0.4987, "step": 5710 }, { - "epoch": 0.6, - "grad_norm": 2.2956736561042357, - "learning_rate": 3.6263826533314827e-06, - "loss": 0.6167, + "epoch": 0.41, + "grad_norm": 1.6846557278146237, + "learning_rate": 6.740002024624688e-06, + "loss": 0.5437, "step": 5711 }, { - "epoch": 0.6, - "grad_norm": 2.384801633397614, - "learning_rate": 3.624744068476086e-06, - "loss": 0.6664, + "epoch": 0.41, + "grad_norm": 1.5367251246347433, + "learning_rate": 6.738924641101343e-06, + "loss": 0.5246, "step": 5712 }, { - "epoch": 0.6, - "grad_norm": 2.0974504782665337, - "learning_rate": 3.623105643393946e-06, - "loss": 0.6318, + "epoch": 0.41, + "grad_norm": 2.0674745172192943, + "learning_rate": 6.737847165722107e-06, + "loss": 0.5427, "step": 5713 }, { - "epoch": 0.6, - "grad_norm": 3.3889022639258015, - "learning_rate": 3.621467378275414e-06, - "loss": 0.6735, + "epoch": 0.41, + "grad_norm": 0.805614223440847, + "learning_rate": 6.736769598543894e-06, + "loss": 0.4449, "step": 5714 }, { - "epoch": 0.6, - "grad_norm": 4.18791054241475, - "learning_rate": 3.6198292733108177e-06, - "loss": 0.6781, + "epoch": 0.41, + "grad_norm": 1.9063365055437047, + "learning_rate": 6.735691939623626e-06, + "loss": 0.5054, "step": 5715 }, { - "epoch": 0.6, - "grad_norm": 2.7589589062878197, - "learning_rate": 3.6181913286904647e-06, - "loss": 0.5676, + "epoch": 0.41, + "grad_norm": 1.7558614145447742, + "learning_rate": 6.734614189018227e-06, + "loss": 0.5232, "step": 5716 }, { - "epoch": 0.6, - "grad_norm": 1.9737675399727652, - "learning_rate": 3.6165535446046497e-06, - "loss": 0.6197, + "epoch": 0.41, + "grad_norm": 1.5348754071985198, + "learning_rate": 6.733536346784631e-06, + "loss": 0.5174, "step": 5717 }, { - "epoch": 0.6, - "grad_norm": 2.520077841376911, - "learning_rate": 3.6149159212436435e-06, - "loss": 0.5949, + "epoch": 0.41, + "grad_norm": 3.144195997605208, + "learning_rate": 6.73245841297977e-06, + "loss": 0.5206, "step": 5718 }, { - "epoch": 0.6, - "grad_norm": 2.2678271084992514, - "learning_rate": 3.6132784587977053e-06, - "loss": 0.5782, + "epoch": 0.41, + "grad_norm": 2.151952167554706, + "learning_rate": 6.7313803876605855e-06, + "loss": 0.5762, "step": 5719 }, { - "epoch": 0.6, - "grad_norm": 2.736740775923461, - "learning_rate": 3.611641157457064e-06, - "loss": 0.5897, + "epoch": 0.41, + "grad_norm": 1.7179519751478105, + "learning_rate": 6.730302270884023e-06, + "loss": 0.5357, "step": 5720 }, { - "epoch": 0.6, - "grad_norm": 2.503689546593751, - "learning_rate": 3.6100040174119403e-06, - "loss": 0.6358, + "epoch": 0.41, + "grad_norm": 3.949699126821526, + "learning_rate": 6.7292240627070295e-06, + "loss": 0.5702, "step": 5721 }, { - "epoch": 0.6, - "grad_norm": 0.9542516668761488, - "learning_rate": 3.6083670388525316e-06, - "loss": 0.6144, + "epoch": 0.41, + "grad_norm": 1.629099399272092, + "learning_rate": 6.7281457631865625e-06, + "loss": 0.5637, "step": 5722 }, { - "epoch": 0.6, - "grad_norm": 3.151965071313513, - "learning_rate": 3.6067302219690175e-06, - "loss": 0.6571, + "epoch": 0.41, + "grad_norm": 1.6794508241025639, + "learning_rate": 6.727067372379581e-06, + "loss": 0.4718, "step": 5723 }, { - "epoch": 0.6, - "grad_norm": 2.083294772175323, - "learning_rate": 3.6050935669515604e-06, - "loss": 0.6405, + "epoch": 0.41, + "grad_norm": 1.5956770952264652, + "learning_rate": 6.725988890343048e-06, + "loss": 0.5324, "step": 5724 }, { - "epoch": 0.6, - "grad_norm": 2.308319919656163, - "learning_rate": 3.603457073990298e-06, - "loss": 0.5927, + "epoch": 0.41, + "grad_norm": 0.7235190206520445, + "learning_rate": 6.724910317133934e-06, + "loss": 0.4269, "step": 5725 }, { - "epoch": 0.6, - "grad_norm": 2.5585087791382075, - "learning_rate": 3.6018207432753572e-06, - "loss": 0.5503, + "epoch": 0.41, + "grad_norm": 1.4676282942084307, + "learning_rate": 6.723831652809213e-06, + "loss": 0.5089, "step": 5726 }, { - "epoch": 0.6, - "grad_norm": 14.380449615576731, - "learning_rate": 3.60018457499684e-06, - "loss": 0.6287, + "epoch": 0.41, + "grad_norm": 1.4863073203782458, + "learning_rate": 6.722752897425863e-06, + "loss": 0.5181, "step": 5727 }, { - "epoch": 0.6, - "grad_norm": 3.450787029376905, - "learning_rate": 3.598548569344834e-06, - "loss": 0.6453, + "epoch": 0.41, + "grad_norm": 1.8540395135905683, + "learning_rate": 6.7216740510408655e-06, + "loss": 0.6063, "step": 5728 }, { - "epoch": 0.6, - "grad_norm": 3.1421747068388366, - "learning_rate": 3.596912726509402e-06, - "loss": 0.7465, + "epoch": 0.41, + "grad_norm": 1.7691641916086094, + "learning_rate": 6.720595113711212e-06, + "loss": 0.5571, "step": 5729 }, { - "epoch": 0.6, - "grad_norm": 2.304462145687152, - "learning_rate": 3.595277046680594e-06, - "loss": 0.6277, + "epoch": 0.41, + "grad_norm": 1.6282863440208029, + "learning_rate": 6.719516085493894e-06, + "loss": 0.6451, "step": 5730 }, { - "epoch": 0.6, - "grad_norm": 5.179794005325396, - "learning_rate": 3.5936415300484383e-06, - "loss": 0.6019, + "epoch": 0.41, + "grad_norm": 1.8028905669978532, + "learning_rate": 6.718436966445911e-06, + "loss": 0.6336, "step": 5731 }, { - "epoch": 0.6, - "grad_norm": 1.0233482875110236, - "learning_rate": 3.592006176802944e-06, - "loss": 0.5521, + "epoch": 0.41, + "grad_norm": 2.1701400717198314, + "learning_rate": 6.717357756624263e-06, + "loss": 0.6002, "step": 5732 }, { - "epoch": 0.6, - "grad_norm": 2.2805038648763887, - "learning_rate": 3.5903709871341034e-06, - "loss": 0.6422, + "epoch": 0.41, + "grad_norm": 1.6370849216670216, + "learning_rate": 6.7162784560859605e-06, + "loss": 0.488, "step": 5733 }, { - "epoch": 0.6, - "grad_norm": 2.7182569922936426, - "learning_rate": 3.5887359612318862e-06, - "loss": 0.5947, + "epoch": 0.41, + "grad_norm": 1.618042266572347, + "learning_rate": 6.715199064888014e-06, + "loss": 0.5585, "step": 5734 }, { - "epoch": 0.6, - "grad_norm": 2.992203373180557, - "learning_rate": 3.5871010992862436e-06, - "loss": 0.6566, + "epoch": 0.41, + "grad_norm": 1.7973475946349688, + "learning_rate": 6.714119583087442e-06, + "loss": 0.5748, "step": 5735 }, { - "epoch": 0.6, - "grad_norm": 3.1660591900577493, - "learning_rate": 3.5854664014871128e-06, - "loss": 0.6317, + "epoch": 0.41, + "grad_norm": 1.5830779001178317, + "learning_rate": 6.713040010741263e-06, + "loss": 0.5482, "step": 5736 }, { - "epoch": 0.6, - "grad_norm": 2.645200532572237, - "learning_rate": 3.5838318680244067e-06, - "loss": 0.6629, + "epoch": 0.41, + "grad_norm": 0.8175832517242283, + "learning_rate": 6.711960347906506e-06, + "loss": 0.4546, "step": 5737 }, { - "epoch": 0.6, - "grad_norm": 2.2006735106420225, - "learning_rate": 3.582197499088019e-06, - "loss": 0.7078, + "epoch": 0.41, + "grad_norm": 1.925210434822727, + "learning_rate": 6.7108805946402045e-06, + "loss": 0.5221, "step": 5738 }, { - "epoch": 0.6, - "grad_norm": 2.7870416290233466, - "learning_rate": 3.580563294867828e-06, - "loss": 0.6616, + "epoch": 0.41, + "grad_norm": 2.199953464379688, + "learning_rate": 6.709800750999392e-06, + "loss": 0.6205, "step": 5739 }, { - "epoch": 0.6, - "grad_norm": 2.2284086143558057, - "learning_rate": 3.5789292555536907e-06, - "loss": 0.644, + "epoch": 0.41, + "grad_norm": 0.7266441741636346, + "learning_rate": 6.70872081704111e-06, + "loss": 0.43, "step": 5740 }, { - "epoch": 0.6, - "grad_norm": 2.0680596768407415, - "learning_rate": 3.5772953813354455e-06, - "loss": 0.5917, + "epoch": 0.41, + "grad_norm": 1.98567652805149, + "learning_rate": 6.707640792822405e-06, + "loss": 0.5494, "step": 5741 }, { - "epoch": 0.6, - "grad_norm": 2.6380941942532816, - "learning_rate": 3.575661672402908e-06, - "loss": 0.5598, + "epoch": 0.41, + "grad_norm": 1.697844368211131, + "learning_rate": 6.706560678400327e-06, + "loss": 0.5877, "step": 5742 }, { - "epoch": 0.6, - "grad_norm": 2.1088238905647647, - "learning_rate": 3.5740281289458812e-06, - "loss": 0.6548, + "epoch": 0.41, + "grad_norm": 1.7958680893123848, + "learning_rate": 6.705480473831931e-06, + "loss": 0.5337, "step": 5743 }, { - "epoch": 0.6, - "grad_norm": 2.364337376015603, - "learning_rate": 3.5723947511541435e-06, - "loss": 0.6301, + "epoch": 0.41, + "grad_norm": 1.8383336017791638, + "learning_rate": 6.704400179174278e-06, + "loss": 0.5383, "step": 5744 }, { - "epoch": 0.6, - "grad_norm": 2.155417858599172, - "learning_rate": 3.5707615392174576e-06, - "loss": 0.6011, + "epoch": 0.41, + "grad_norm": 1.5536424043182566, + "learning_rate": 6.703319794484431e-06, + "loss": 0.5143, "step": 5745 }, { - "epoch": 0.6, - "grad_norm": 2.789662978157353, - "learning_rate": 3.5691284933255653e-06, - "loss": 0.6223, + "epoch": 0.41, + "grad_norm": 1.52143293287606, + "learning_rate": 6.702239319819462e-06, + "loss": 0.5935, "step": 5746 }, { - "epoch": 0.6, - "grad_norm": 2.436792685729838, - "learning_rate": 3.567495613668188e-06, - "loss": 0.6495, + "epoch": 0.41, + "grad_norm": 1.6518236420419183, + "learning_rate": 6.701158755236443e-06, + "loss": 0.5012, "step": 5747 }, { - "epoch": 0.6, - "grad_norm": 2.2959050511929493, - "learning_rate": 3.565862900435028e-06, - "loss": 0.6366, + "epoch": 0.41, + "grad_norm": 2.0331026921423443, + "learning_rate": 6.700078100792456e-06, + "loss": 0.5662, "step": 5748 }, { - "epoch": 0.6, - "grad_norm": 3.0678303289148188, - "learning_rate": 3.564230353815772e-06, - "loss": 0.5961, + "epoch": 0.41, + "grad_norm": 1.592362983700345, + "learning_rate": 6.698997356544582e-06, + "loss": 0.5768, "step": 5749 }, { - "epoch": 0.61, - "grad_norm": 2.2062343340152553, - "learning_rate": 3.562597974000084e-06, - "loss": 0.5445, + "epoch": 0.41, + "grad_norm": 1.3722368487055976, + "learning_rate": 6.697916522549911e-06, + "loss": 0.5506, "step": 5750 }, { - "epoch": 0.61, - "grad_norm": 1.1459202020440926, - "learning_rate": 3.5609657611776055e-06, - "loss": 0.5367, + "epoch": 0.41, + "grad_norm": 1.7940171439446042, + "learning_rate": 6.696835598865535e-06, + "loss": 0.5959, "step": 5751 }, { - "epoch": 0.61, - "grad_norm": 2.892508323233743, - "learning_rate": 3.5593337155379663e-06, - "loss": 0.5296, + "epoch": 0.41, + "grad_norm": 1.6868269841295558, + "learning_rate": 6.695754585548554e-06, + "loss": 0.4601, "step": 5752 }, { - "epoch": 0.61, - "grad_norm": 13.502367015161873, - "learning_rate": 3.5577018372707706e-06, - "loss": 0.5599, + "epoch": 0.41, + "grad_norm": 1.5186106905035524, + "learning_rate": 6.694673482656069e-06, + "loss": 0.5474, "step": 5753 }, { - "epoch": 0.61, - "grad_norm": 4.60449934352309, - "learning_rate": 3.5560701265656096e-06, - "loss": 0.5771, + "epoch": 0.41, + "grad_norm": 1.6519051152536548, + "learning_rate": 6.69359229024519e-06, + "loss": 0.5828, "step": 5754 }, { - "epoch": 0.61, - "grad_norm": 2.0330026690570517, - "learning_rate": 3.5544385836120445e-06, - "loss": 0.6543, + "epoch": 0.41, + "grad_norm": 17.220139531461516, + "learning_rate": 6.692511008373026e-06, + "loss": 0.6098, "step": 5755 }, { - "epoch": 0.61, - "grad_norm": 2.246716338488602, - "learning_rate": 3.552807208599626e-06, - "loss": 0.6199, + "epoch": 0.41, + "grad_norm": 1.7882914488742574, + "learning_rate": 6.6914296370966946e-06, + "loss": 0.5359, "step": 5756 }, { - "epoch": 0.61, - "grad_norm": 2.880693301049812, - "learning_rate": 3.551176001717882e-06, - "loss": 0.5812, + "epoch": 0.41, + "grad_norm": 1.7018547298903768, + "learning_rate": 6.69034817647332e-06, + "loss": 0.5607, "step": 5757 }, { - "epoch": 0.61, - "grad_norm": 2.374133819647502, - "learning_rate": 3.549544963156324e-06, - "loss": 0.6039, + "epoch": 0.41, + "grad_norm": 1.6300845373513686, + "learning_rate": 6.689266626560027e-06, + "loss": 0.4767, "step": 5758 }, { - "epoch": 0.61, - "grad_norm": 3.651695196080499, - "learning_rate": 3.5479140931044393e-06, - "loss": 0.6026, + "epoch": 0.41, + "grad_norm": 1.6593647554783333, + "learning_rate": 6.688184987413946e-06, + "loss": 0.5007, "step": 5759 }, { - "epoch": 0.61, - "grad_norm": 2.256349484082301, - "learning_rate": 3.546283391751696e-06, - "loss": 0.5949, + "epoch": 0.41, + "grad_norm": 1.70715391962162, + "learning_rate": 6.687103259092214e-06, + "loss": 0.5641, "step": 5760 }, { - "epoch": 0.61, - "grad_norm": 2.2426448275819504, - "learning_rate": 3.5446528592875464e-06, - "loss": 0.6444, + "epoch": 0.41, + "grad_norm": 1.489071048716644, + "learning_rate": 6.686021441651972e-06, + "loss": 0.499, "step": 5761 }, { - "epoch": 0.61, - "grad_norm": 2.0973705329694723, - "learning_rate": 3.5430224959014215e-06, - "loss": 0.7379, + "epoch": 0.41, + "grad_norm": 0.8570617378494868, + "learning_rate": 6.684939535150363e-06, + "loss": 0.4424, "step": 5762 }, { - "epoch": 0.61, - "grad_norm": 2.2641582085188, - "learning_rate": 3.5413923017827317e-06, - "loss": 0.5749, + "epoch": 0.41, + "grad_norm": 1.5602427679855386, + "learning_rate": 6.68385753964454e-06, + "loss": 0.4831, "step": 5763 }, { - "epoch": 0.61, - "grad_norm": 3.058982375661702, - "learning_rate": 3.5397622771208663e-06, - "loss": 0.5927, + "epoch": 0.41, + "grad_norm": 0.7482368615049606, + "learning_rate": 6.682775455191655e-06, + "loss": 0.453, "step": 5764 }, { - "epoch": 0.61, - "grad_norm": 2.998975467492036, - "learning_rate": 3.5381324221051995e-06, - "loss": 0.611, + "epoch": 0.41, + "grad_norm": 1.723528573881567, + "learning_rate": 6.681693281848869e-06, + "loss": 0.5407, "step": 5765 }, { - "epoch": 0.61, - "grad_norm": 2.436460113249485, - "learning_rate": 3.5365027369250804e-06, - "loss": 0.5845, + "epoch": 0.41, + "grad_norm": 2.6196031432757465, + "learning_rate": 6.680611019673344e-06, + "loss": 0.5087, "step": 5766 }, { - "epoch": 0.61, - "grad_norm": 2.336543180347027, - "learning_rate": 3.5348732217698466e-06, - "loss": 0.6229, + "epoch": 0.41, + "grad_norm": 2.306307738471365, + "learning_rate": 6.679528668722252e-06, + "loss": 0.5362, "step": 5767 }, { - "epoch": 0.61, - "grad_norm": 3.005942928681562, - "learning_rate": 3.533243876828803e-06, - "loss": 0.6442, + "epoch": 0.41, + "grad_norm": 1.6948068804369243, + "learning_rate": 6.678446229052763e-06, + "loss": 0.5271, "step": 5768 }, { - "epoch": 0.61, - "grad_norm": 2.8740853995553226, - "learning_rate": 3.531614702291247e-06, - "loss": 0.5655, + "epoch": 0.41, + "grad_norm": 2.2205973624194906, + "learning_rate": 6.677363700722059e-06, + "loss": 0.5506, "step": 5769 }, { - "epoch": 0.61, - "grad_norm": 25.902005172145653, - "learning_rate": 3.5299856983464497e-06, - "loss": 0.6208, + "epoch": 0.41, + "grad_norm": 1.537629822859046, + "learning_rate": 6.67628108378732e-06, + "loss": 0.5332, "step": 5770 }, { - "epoch": 0.61, - "grad_norm": 2.499670510652518, - "learning_rate": 3.528356865183665e-06, - "loss": 0.5878, + "epoch": 0.41, + "grad_norm": 1.4724493193732056, + "learning_rate": 6.675198378305734e-06, + "loss": 0.4992, "step": 5771 }, { - "epoch": 0.61, - "grad_norm": 2.5307387633144414, - "learning_rate": 3.526728202992127e-06, - "loss": 0.7085, + "epoch": 0.41, + "grad_norm": 1.4876039510398744, + "learning_rate": 6.674115584334492e-06, + "loss": 0.5106, "step": 5772 }, { - "epoch": 0.61, - "grad_norm": 2.510774660981912, - "learning_rate": 3.525099711961045e-06, - "loss": 0.6365, + "epoch": 0.41, + "grad_norm": 1.6734011657560028, + "learning_rate": 6.673032701930793e-06, + "loss": 0.558, "step": 5773 }, { - "epoch": 0.61, - "grad_norm": 2.6261622750991034, - "learning_rate": 3.523471392279616e-06, - "loss": 0.6566, + "epoch": 0.41, + "grad_norm": 1.595548803551656, + "learning_rate": 6.671949731151836e-06, + "loss": 0.5845, "step": 5774 }, { - "epoch": 0.61, - "grad_norm": 2.445814884829749, - "learning_rate": 3.521843244137013e-06, - "loss": 0.7112, + "epoch": 0.41, + "grad_norm": 2.112351022890642, + "learning_rate": 6.670866672054832e-06, + "loss": 0.5575, "step": 5775 }, { - "epoch": 0.61, - "grad_norm": 3.355458478806883, - "learning_rate": 3.52021526772239e-06, - "loss": 0.6402, + "epoch": 0.41, + "grad_norm": 0.7433940607804329, + "learning_rate": 6.669783524696988e-06, + "loss": 0.484, "step": 5776 }, { - "epoch": 0.61, - "grad_norm": 2.3733230146293263, - "learning_rate": 3.5185874632248775e-06, - "loss": 0.6198, + "epoch": 0.41, + "grad_norm": 2.0134425174265953, + "learning_rate": 6.668700289135519e-06, + "loss": 0.5895, "step": 5777 }, { - "epoch": 0.61, - "grad_norm": 2.8989292888055886, - "learning_rate": 3.5169598308335915e-06, - "loss": 0.592, + "epoch": 0.41, + "grad_norm": 2.0297625476034855, + "learning_rate": 6.667616965427648e-06, + "loss": 0.4949, "step": 5778 }, { - "epoch": 0.61, - "grad_norm": 2.1665097816063295, - "learning_rate": 3.515332370737625e-06, - "loss": 0.669, + "epoch": 0.41, + "grad_norm": 0.7106069278250118, + "learning_rate": 6.666533553630596e-06, + "loss": 0.4579, "step": 5779 }, { - "epoch": 0.61, - "grad_norm": 1.005926002720759, - "learning_rate": 3.513705083126054e-06, - "loss": 0.6319, + "epoch": 0.41, + "grad_norm": 1.8146723135882608, + "learning_rate": 6.665450053801596e-06, + "loss": 0.5492, "step": 5780 }, { - "epoch": 0.61, - "grad_norm": 2.336459256200082, - "learning_rate": 3.5120779681879286e-06, - "loss": 0.6206, + "epoch": 0.41, + "grad_norm": 1.8171200575549025, + "learning_rate": 6.664366465997881e-06, + "loss": 0.5904, "step": 5781 }, { - "epoch": 0.61, - "grad_norm": 2.298134036866341, - "learning_rate": 3.5104510261122836e-06, - "loss": 0.6818, + "epoch": 0.41, + "grad_norm": 1.5678235917074674, + "learning_rate": 6.663282790276689e-06, + "loss": 0.4849, "step": 5782 }, { - "epoch": 0.61, - "grad_norm": 2.8034284235986724, - "learning_rate": 3.508824257088132e-06, - "loss": 0.653, + "epoch": 0.41, + "grad_norm": 1.5050471351965762, + "learning_rate": 6.662199026695264e-06, + "loss": 0.4832, "step": 5783 }, { - "epoch": 0.61, - "grad_norm": 3.123317385052922, - "learning_rate": 3.507197661304469e-06, - "loss": 0.6499, + "epoch": 0.41, + "grad_norm": 1.6138005637202333, + "learning_rate": 6.661115175310856e-06, + "loss": 0.5561, "step": 5784 }, { - "epoch": 0.61, - "grad_norm": 2.655862388540164, - "learning_rate": 3.505571238950267e-06, - "loss": 0.5429, + "epoch": 0.41, + "grad_norm": 2.327840322474769, + "learning_rate": 6.660031236180714e-06, + "loss": 0.541, "step": 5785 }, { - "epoch": 0.61, - "grad_norm": 2.490391865079292, - "learning_rate": 3.5039449902144763e-06, - "loss": 0.6385, + "epoch": 0.41, + "grad_norm": 1.8569371961890748, + "learning_rate": 6.658947209362098e-06, + "loss": 0.4884, "step": 5786 }, { - "epoch": 0.61, - "grad_norm": 4.384431381049101, - "learning_rate": 3.5023189152860325e-06, - "loss": 0.7115, + "epoch": 0.41, + "grad_norm": 1.8688859563962748, + "learning_rate": 6.657863094912268e-06, + "loss": 0.5406, "step": 5787 }, { - "epoch": 0.61, - "grad_norm": 2.6398628266384607, - "learning_rate": 3.5006930143538477e-06, - "loss": 0.6195, + "epoch": 0.41, + "grad_norm": 1.6048391504978559, + "learning_rate": 6.656778892888492e-06, + "loss": 0.5745, "step": 5788 }, { - "epoch": 0.61, - "grad_norm": 5.043162425725876, - "learning_rate": 3.499067287606817e-06, - "loss": 0.5859, + "epoch": 0.41, + "grad_norm": 1.9897227982079615, + "learning_rate": 6.655694603348042e-06, + "loss": 0.5137, "step": 5789 }, { - "epoch": 0.61, - "grad_norm": 2.925176048693361, - "learning_rate": 3.4974417352338074e-06, - "loss": 0.6185, + "epoch": 0.41, + "grad_norm": 1.7871049057054125, + "learning_rate": 6.65461022634819e-06, + "loss": 0.5713, "step": 5790 }, { - "epoch": 0.61, - "grad_norm": 2.3132796544844303, - "learning_rate": 3.495816357423674e-06, - "loss": 0.6343, + "epoch": 0.41, + "grad_norm": 1.6563575605708936, + "learning_rate": 6.6535257619462235e-06, + "loss": 0.4688, "step": 5791 }, { - "epoch": 0.61, - "grad_norm": 2.2778784069626004, - "learning_rate": 3.494191154365247e-06, - "loss": 0.6054, + "epoch": 0.41, + "grad_norm": 2.1114938412394264, + "learning_rate": 6.652441210199421e-06, + "loss": 0.6042, "step": 5792 }, { - "epoch": 0.61, - "grad_norm": 2.7460899113803503, - "learning_rate": 3.492566126247341e-06, - "loss": 0.6063, + "epoch": 0.41, + "grad_norm": 1.6606467203691937, + "learning_rate": 6.651356571165075e-06, + "loss": 0.5664, "step": 5793 }, { - "epoch": 0.61, - "grad_norm": 3.201639636764578, - "learning_rate": 3.4909412732587444e-06, - "loss": 0.6561, + "epoch": 0.41, + "grad_norm": 1.640992918396469, + "learning_rate": 6.650271844900479e-06, + "loss": 0.4725, "step": 5794 }, { - "epoch": 0.61, - "grad_norm": 2.547089477361622, - "learning_rate": 3.4893165955882275e-06, - "loss": 0.5957, + "epoch": 0.41, + "grad_norm": 1.939534503436713, + "learning_rate": 6.649187031462932e-06, + "loss": 0.5927, "step": 5795 }, { - "epoch": 0.61, - "grad_norm": 2.598764658378845, - "learning_rate": 3.4876920934245423e-06, - "loss": 0.6941, + "epoch": 0.41, + "grad_norm": 3.027831462441318, + "learning_rate": 6.648102130909739e-06, + "loss": 0.6497, "step": 5796 }, { - "epoch": 0.61, - "grad_norm": 2.3890690610014795, - "learning_rate": 3.486067766956418e-06, - "loss": 0.6066, + "epoch": 0.41, + "grad_norm": 1.7887985959466541, + "learning_rate": 6.647017143298205e-06, + "loss": 0.6044, "step": 5797 }, { - "epoch": 0.61, - "grad_norm": 2.9128726002118785, - "learning_rate": 3.4844436163725648e-06, - "loss": 0.5642, + "epoch": 0.41, + "grad_norm": 2.1472573308679497, + "learning_rate": 6.645932068685645e-06, + "loss": 0.5699, "step": 5798 }, { - "epoch": 0.61, - "grad_norm": 3.8378397404411326, - "learning_rate": 3.48281964186167e-06, - "loss": 0.6248, + "epoch": 0.41, + "grad_norm": 2.2014938565627316, + "learning_rate": 6.644846907129378e-06, + "loss": 0.4672, "step": 5799 }, { - "epoch": 0.61, - "grad_norm": 2.3946114293745913, - "learning_rate": 3.4811958436124036e-06, - "loss": 0.6376, + "epoch": 0.41, + "grad_norm": 1.489588528129715, + "learning_rate": 6.643761658686723e-06, + "loss": 0.5382, "step": 5800 }, { - "epoch": 0.61, - "grad_norm": 2.903630763714226, - "learning_rate": 3.479572221813413e-06, - "loss": 0.5643, + "epoch": 0.41, + "grad_norm": 2.1872491566097336, + "learning_rate": 6.642676323415007e-06, + "loss": 0.5429, "step": 5801 }, { - "epoch": 0.61, - "grad_norm": 5.522387352098266, - "learning_rate": 3.4779487766533306e-06, - "loss": 0.6095, + "epoch": 0.41, + "grad_norm": 1.4858081630269468, + "learning_rate": 6.64159090137156e-06, + "loss": 0.5103, "step": 5802 }, { - "epoch": 0.61, - "grad_norm": 2.539618765581498, - "learning_rate": 3.4763255083207547e-06, - "loss": 0.5834, + "epoch": 0.41, + "grad_norm": 1.6646291862997982, + "learning_rate": 6.6405053926137205e-06, + "loss": 0.5529, "step": 5803 }, { - "epoch": 0.61, - "grad_norm": 3.498863654137628, - "learning_rate": 3.4747024170042785e-06, - "loss": 0.6581, + "epoch": 0.41, + "grad_norm": 2.7234757898647746, + "learning_rate": 6.6394197971988275e-06, + "loss": 0.5163, "step": 5804 }, { - "epoch": 0.61, - "grad_norm": 2.0699886181482836, - "learning_rate": 3.473079502892466e-06, - "loss": 0.686, + "epoch": 0.41, + "grad_norm": 1.7045573649277839, + "learning_rate": 6.638334115184225e-06, + "loss": 0.5393, "step": 5805 }, { - "epoch": 0.61, - "grad_norm": 2.56411416397428, - "learning_rate": 3.4714567661738635e-06, - "loss": 0.6205, + "epoch": 0.41, + "grad_norm": 1.59100620795315, + "learning_rate": 6.637248346627264e-06, + "loss": 0.5761, "step": 5806 }, { - "epoch": 0.61, - "grad_norm": 3.6804375443854056, - "learning_rate": 3.469834207036996e-06, - "loss": 0.5698, + "epoch": 0.41, + "grad_norm": 1.65334459440667, + "learning_rate": 6.636162491585298e-06, + "loss": 0.5945, "step": 5807 }, { - "epoch": 0.61, - "grad_norm": 2.3140301754849704, - "learning_rate": 3.4682118256703657e-06, - "loss": 0.6665, + "epoch": 0.41, + "grad_norm": 1.8939127780484482, + "learning_rate": 6.6350765501156844e-06, + "loss": 0.5611, "step": 5808 }, { - "epoch": 0.61, - "grad_norm": 5.132346843122038, - "learning_rate": 3.4665896222624585e-06, - "loss": 0.6473, + "epoch": 0.41, + "grad_norm": 1.8084174524423957, + "learning_rate": 6.633990522275786e-06, + "loss": 0.5581, "step": 5809 }, { - "epoch": 0.61, - "grad_norm": 2.1626805670201685, - "learning_rate": 3.4649675970017355e-06, - "loss": 0.5748, + "epoch": 0.41, + "grad_norm": 3.255443769800256, + "learning_rate": 6.6329044081229735e-06, + "loss": 0.5735, "step": 5810 }, { - "epoch": 0.61, - "grad_norm": 2.6486782602012684, - "learning_rate": 3.4633457500766413e-06, - "loss": 0.6031, + "epoch": 0.41, + "grad_norm": 1.5527562812405404, + "learning_rate": 6.631818207714614e-06, + "loss": 0.6108, "step": 5811 }, { - "epoch": 0.61, - "grad_norm": 2.5370141596453624, - "learning_rate": 3.4617240816755937e-06, - "loss": 0.5877, + "epoch": 0.41, + "grad_norm": 1.8891091345518953, + "learning_rate": 6.6307319211080915e-06, + "loss": 0.524, "step": 5812 }, { - "epoch": 0.61, - "grad_norm": 2.5534598070476, - "learning_rate": 3.460102591986997e-06, - "loss": 0.6301, + "epoch": 0.41, + "grad_norm": 0.8597358763544644, + "learning_rate": 6.629645548360781e-06, + "loss": 0.4474, "step": 5813 }, { - "epoch": 0.61, - "grad_norm": 3.3454701642307962, - "learning_rate": 3.4584812811992287e-06, - "loss": 0.6339, + "epoch": 0.41, + "grad_norm": 1.590928097655433, + "learning_rate": 6.628559089530071e-06, + "loss": 0.5393, "step": 5814 }, { - "epoch": 0.61, - "grad_norm": 3.0095813678630763, - "learning_rate": 3.4568601495006503e-06, - "loss": 0.6619, + "epoch": 0.41, + "grad_norm": 0.8598403080943261, + "learning_rate": 6.62747254467335e-06, + "loss": 0.4767, "step": 5815 }, { - "epoch": 0.61, - "grad_norm": 2.4805455518296835, - "learning_rate": 3.4552391970795984e-06, - "loss": 0.6543, + "epoch": 0.41, + "grad_norm": 1.8482831175420178, + "learning_rate": 6.626385913848017e-06, + "loss": 0.5926, "step": 5816 }, { - "epoch": 0.61, - "grad_norm": 2.491050633881681, - "learning_rate": 3.453618424124392e-06, - "loss": 0.6431, + "epoch": 0.41, + "grad_norm": 1.4200643022178265, + "learning_rate": 6.625299197111468e-06, + "loss": 0.4558, "step": 5817 }, { - "epoch": 0.61, - "grad_norm": 5.733199748643421, - "learning_rate": 3.451997830823325e-06, - "loss": 0.5999, + "epoch": 0.41, + "grad_norm": 2.5643562092642345, + "learning_rate": 6.624212394521108e-06, + "loss": 0.5199, "step": 5818 }, { - "epoch": 0.61, - "grad_norm": 3.0026297623529605, - "learning_rate": 3.4503774173646767e-06, - "loss": 0.5972, + "epoch": 0.41, + "grad_norm": 1.5958436966698553, + "learning_rate": 6.6231255061343455e-06, + "loss": 0.5348, "step": 5819 }, { - "epoch": 0.61, - "grad_norm": 3.1106086481537787, - "learning_rate": 3.448757183936701e-06, - "loss": 0.6202, + "epoch": 0.41, + "grad_norm": 1.514106329736402, + "learning_rate": 6.622038532008595e-06, + "loss": 0.5767, "step": 5820 }, { - "epoch": 0.61, - "grad_norm": 5.983654597238341, - "learning_rate": 3.447137130727629e-06, - "loss": 0.6625, + "epoch": 0.41, + "grad_norm": 1.5610319558934846, + "learning_rate": 6.620951472201273e-06, + "loss": 0.575, "step": 5821 }, { - "epoch": 0.61, - "grad_norm": 3.81371149233971, - "learning_rate": 3.4455172579256784e-06, - "loss": 0.6177, + "epoch": 0.41, + "grad_norm": 1.837319649847454, + "learning_rate": 6.619864326769801e-06, + "loss": 0.5196, "step": 5822 }, { - "epoch": 0.61, - "grad_norm": 2.458016439088697, - "learning_rate": 3.4438975657190375e-06, - "loss": 0.7018, - "step": 5823 + "epoch": 0.41, + "grad_norm": 1.9556621972093151, + "learning_rate": 6.618777095771607e-06, + "loss": 0.5694, + "step": 5823 }, { - "epoch": 0.61, - "grad_norm": 1.9033971357544983, - "learning_rate": 3.442278054295883e-06, - "loss": 0.5718, + "epoch": 0.41, + "grad_norm": 1.616852612292827, + "learning_rate": 6.617689779264121e-06, + "loss": 0.5851, "step": 5824 }, { - "epoch": 0.61, - "grad_norm": 2.9518489778855823, - "learning_rate": 3.440658723844358e-06, - "loss": 0.6359, + "epoch": 0.41, + "grad_norm": 1.4980949960208676, + "learning_rate": 6.6166023773047785e-06, + "loss": 0.5159, "step": 5825 }, { - "epoch": 0.61, - "grad_norm": 2.6141808916194367, - "learning_rate": 3.439039574552595e-06, - "loss": 0.5865, + "epoch": 0.41, + "grad_norm": 6.318601235617634, + "learning_rate": 6.615514889951021e-06, + "loss": 0.5464, "step": 5826 }, { - "epoch": 0.61, - "grad_norm": 2.2429938266365808, - "learning_rate": 3.437420606608701e-06, - "loss": 0.6993, + "epoch": 0.41, + "grad_norm": 2.3174452650312207, + "learning_rate": 6.6144273172602945e-06, + "loss": 0.5894, "step": 5827 }, { - "epoch": 0.61, - "grad_norm": 2.1988445058564614, - "learning_rate": 3.435801820200767e-06, - "loss": 0.6538, + "epoch": 0.41, + "grad_norm": 2.144094295253536, + "learning_rate": 6.613339659290046e-06, + "loss": 0.5694, "step": 5828 }, { - "epoch": 0.61, - "grad_norm": 2.093978386990422, - "learning_rate": 3.4341832155168547e-06, - "loss": 0.6003, + "epoch": 0.41, + "grad_norm": 1.919415252851093, + "learning_rate": 6.61225191609773e-06, + "loss": 0.5155, "step": 5829 }, { - "epoch": 0.61, - "grad_norm": 2.311995229831535, - "learning_rate": 3.432564792745009e-06, - "loss": 0.6298, + "epoch": 0.41, + "grad_norm": 1.594499950881667, + "learning_rate": 6.611164087740803e-06, + "loss": 0.5214, "step": 5830 }, { - "epoch": 0.61, - "grad_norm": 2.324588374264805, - "learning_rate": 3.4309465520732556e-06, - "loss": 0.5572, + "epoch": 0.41, + "grad_norm": 1.6892905245560006, + "learning_rate": 6.610076174276731e-06, + "loss": 0.5474, "step": 5831 }, { - "epoch": 0.61, - "grad_norm": 1.0044411358111915, - "learning_rate": 3.4293284936895956e-06, - "loss": 0.548, + "epoch": 0.41, + "grad_norm": 1.5805599373524648, + "learning_rate": 6.608988175762981e-06, + "loss": 0.5949, "step": 5832 }, { - "epoch": 0.61, - "grad_norm": 4.285667049229145, - "learning_rate": 3.4277106177820123e-06, - "loss": 0.6496, + "epoch": 0.41, + "grad_norm": 1.6184606368302594, + "learning_rate": 6.607900092257021e-06, + "loss": 0.5287, "step": 5833 }, { - "epoch": 0.61, - "grad_norm": 2.389684858763753, - "learning_rate": 3.426092924538462e-06, - "loss": 0.654, + "epoch": 0.41, + "grad_norm": 1.8338095688541032, + "learning_rate": 6.606811923816331e-06, + "loss": 0.5692, "step": 5834 }, { - "epoch": 0.61, - "grad_norm": 2.2261660392871327, - "learning_rate": 3.4244754141468878e-06, - "loss": 0.7062, + "epoch": 0.41, + "grad_norm": 1.7563597841212846, + "learning_rate": 6.6057236704983915e-06, + "loss": 0.5232, "step": 5835 }, { - "epoch": 0.61, - "grad_norm": 2.237433451917938, - "learning_rate": 3.4228580867952044e-06, - "loss": 0.6249, + "epoch": 0.41, + "grad_norm": 1.3992834644711782, + "learning_rate": 6.6046353323606845e-06, + "loss": 0.521, "step": 5836 }, { - "epoch": 0.61, - "grad_norm": 3.0863620341722386, - "learning_rate": 3.421240942671312e-06, - "loss": 0.6019, + "epoch": 0.41, + "grad_norm": 0.7724057314054366, + "learning_rate": 6.603546909460704e-06, + "loss": 0.4573, "step": 5837 }, { - "epoch": 0.61, - "grad_norm": 2.5102635909640636, - "learning_rate": 3.4196239819630806e-06, - "loss": 0.5542, + "epoch": 0.41, + "grad_norm": 1.9577311329599112, + "learning_rate": 6.602458401855942e-06, + "loss": 0.5167, "step": 5838 }, { - "epoch": 0.61, - "grad_norm": 2.377771418582857, - "learning_rate": 3.4180072048583667e-06, - "loss": 0.7179, + "epoch": 0.41, + "grad_norm": 2.0186379592615666, + "learning_rate": 6.601369809603897e-06, + "loss": 0.5243, "step": 5839 }, { - "epoch": 0.61, - "grad_norm": 2.8495071063526494, - "learning_rate": 3.4163906115450025e-06, - "loss": 0.6351, + "epoch": 0.41, + "grad_norm": 1.4578422557147754, + "learning_rate": 6.600281132762072e-06, + "loss": 0.5176, "step": 5840 }, { - "epoch": 0.61, - "grad_norm": 4.504564584687121, - "learning_rate": 3.4147742022108e-06, - "loss": 0.5398, + "epoch": 0.41, + "grad_norm": 1.6732457955302167, + "learning_rate": 6.599192371387975e-06, + "loss": 0.531, "step": 5841 }, { - "epoch": 0.61, - "grad_norm": 2.7848784093251355, - "learning_rate": 3.4131579770435495e-06, - "loss": 0.67, + "epoch": 0.41, + "grad_norm": 1.767581631015389, + "learning_rate": 6.598103525539119e-06, + "loss": 0.5482, "step": 5842 }, { - "epoch": 0.61, - "grad_norm": 2.7193060597973844, - "learning_rate": 3.411541936231016e-06, - "loss": 0.6779, + "epoch": 0.41, + "grad_norm": 1.6960501511413577, + "learning_rate": 6.597014595273019e-06, + "loss": 0.5367, "step": 5843 }, { - "epoch": 0.61, - "grad_norm": 2.9893759030534155, - "learning_rate": 3.409926079960949e-06, - "loss": 0.5734, + "epoch": 0.41, + "grad_norm": 2.060741112499225, + "learning_rate": 6.595925580647197e-06, + "loss": 0.6372, "step": 5844 }, { - "epoch": 0.62, - "grad_norm": 2.402119046084388, - "learning_rate": 3.4083104084210746e-06, - "loss": 0.7261, + "epoch": 0.41, + "grad_norm": 1.4729163491576367, + "learning_rate": 6.594836481719179e-06, + "loss": 0.5636, "step": 5845 }, { - "epoch": 0.62, - "grad_norm": 3.2792238487813976, - "learning_rate": 3.4066949217990964e-06, - "loss": 0.6428, + "epoch": 0.41, + "grad_norm": 1.6451339460813295, + "learning_rate": 6.593747298546493e-06, + "loss": 0.5496, "step": 5846 }, { - "epoch": 0.62, - "grad_norm": 2.754234915442587, - "learning_rate": 3.4050796202826943e-06, - "loss": 0.6201, + "epoch": 0.41, + "grad_norm": 1.5021800487458752, + "learning_rate": 6.592658031186675e-06, + "loss": 0.5012, "step": 5847 }, { - "epoch": 0.62, - "grad_norm": 2.5663658021698423, - "learning_rate": 3.4034645040595325e-06, - "loss": 0.6867, + "epoch": 0.41, + "grad_norm": 1.498489538791036, + "learning_rate": 6.591568679697262e-06, + "loss": 0.5169, "step": 5848 }, { - "epoch": 0.62, - "grad_norm": 2.808416018507515, - "learning_rate": 3.4018495733172485e-06, - "loss": 0.7043, + "epoch": 0.42, + "grad_norm": 1.8280574222427548, + "learning_rate": 6.590479244135799e-06, + "loss": 0.5591, "step": 5849 }, { - "epoch": 0.62, - "grad_norm": 2.945233847925162, - "learning_rate": 3.4002348282434637e-06, - "loss": 0.5994, + "epoch": 0.42, + "grad_norm": 1.5006993796623003, + "learning_rate": 6.589389724559834e-06, + "loss": 0.5256, "step": 5850 }, { - "epoch": 0.62, - "grad_norm": 2.242388743778958, - "learning_rate": 3.3986202690257707e-06, - "loss": 0.689, + "epoch": 0.42, + "grad_norm": 1.8584691369104496, + "learning_rate": 6.588300121026916e-06, + "loss": 0.5904, "step": 5851 }, { - "epoch": 0.62, - "grad_norm": 2.706078895924368, - "learning_rate": 3.397005895851746e-06, - "loss": 0.6305, + "epoch": 0.42, + "grad_norm": 1.5129653397107585, + "learning_rate": 6.5872104335946055e-06, + "loss": 0.4931, "step": 5852 }, { - "epoch": 0.62, - "grad_norm": 2.4900554279730427, - "learning_rate": 3.395391708908941e-06, - "loss": 0.6242, + "epoch": 0.42, + "grad_norm": 1.6003486391258779, + "learning_rate": 6.586120662320461e-06, + "loss": 0.4777, "step": 5853 }, { - "epoch": 0.62, - "grad_norm": 3.2235848293555365, - "learning_rate": 3.39377770838489e-06, - "loss": 0.6205, + "epoch": 0.42, + "grad_norm": 1.90456694502086, + "learning_rate": 6.585030807262048e-06, + "loss": 0.5859, "step": 5854 }, { - "epoch": 0.62, - "grad_norm": 2.1780532875830025, - "learning_rate": 3.392163894467103e-06, - "loss": 0.6479, + "epoch": 0.42, + "grad_norm": 1.6676159434966487, + "learning_rate": 6.5839408684769365e-06, + "loss": 0.5708, "step": 5855 }, { - "epoch": 0.62, - "grad_norm": 2.180928205646365, - "learning_rate": 3.3905502673430648e-06, - "loss": 0.6474, + "epoch": 0.42, + "grad_norm": 1.5249454621712055, + "learning_rate": 6.582850846022703e-06, + "loss": 0.5564, "step": 5856 }, { - "epoch": 0.62, - "grad_norm": 2.0354934950603507, - "learning_rate": 3.3889368272002455e-06, - "loss": 0.6783, + "epoch": 0.42, + "grad_norm": 1.721881061115242, + "learning_rate": 6.581760739956924e-06, + "loss": 0.5733, "step": 5857 }, { - "epoch": 0.62, - "grad_norm": 13.984789819926613, - "learning_rate": 3.387323574226087e-06, - "loss": 0.6263, + "epoch": 0.42, + "grad_norm": 1.6009569827800252, + "learning_rate": 6.580670550337182e-06, + "loss": 0.4962, "step": 5858 }, { - "epoch": 0.62, - "grad_norm": 2.841089761628964, - "learning_rate": 3.385710508608017e-06, - "loss": 0.6473, + "epoch": 0.42, + "grad_norm": 1.8335563834269406, + "learning_rate": 6.579580277221067e-06, + "loss": 0.6091, "step": 5859 }, { - "epoch": 0.62, - "grad_norm": 4.189754673822482, - "learning_rate": 3.3840976305334304e-06, - "loss": 0.5959, + "epoch": 0.42, + "grad_norm": 1.6165275464339568, + "learning_rate": 6.578489920666168e-06, + "loss": 0.536, "step": 5860 }, { - "epoch": 0.62, - "grad_norm": 2.5889741809528988, - "learning_rate": 3.382484940189711e-06, - "loss": 0.6292, + "epoch": 0.42, + "grad_norm": 0.763836441011334, + "learning_rate": 6.5773994807300845e-06, + "loss": 0.4207, "step": 5861 }, { - "epoch": 0.62, - "grad_norm": 2.289269598614149, - "learning_rate": 3.380872437764215e-06, - "loss": 0.6444, + "epoch": 0.42, + "grad_norm": 1.8510295611923626, + "learning_rate": 6.576308957470414e-06, + "loss": 0.5294, "step": 5862 }, { - "epoch": 0.62, - "grad_norm": 2.422036993894899, - "learning_rate": 3.37926012344428e-06, - "loss": 0.5488, + "epoch": 0.42, + "grad_norm": 2.7844298885539325, + "learning_rate": 6.575218350944763e-06, + "loss": 0.4971, "step": 5863 }, { - "epoch": 0.62, - "grad_norm": 2.456124041276871, - "learning_rate": 3.3776479974172184e-06, - "loss": 0.6358, + "epoch": 0.42, + "grad_norm": 1.6406743511317718, + "learning_rate": 6.574127661210741e-06, + "loss": 0.5951, "step": 5864 }, { - "epoch": 0.62, - "grad_norm": 2.4906368084019817, - "learning_rate": 3.3760360598703217e-06, - "loss": 0.5991, + "epoch": 0.42, + "grad_norm": 1.9972307078304927, + "learning_rate": 6.573036888325963e-06, + "loss": 0.5772, "step": 5865 }, { - "epoch": 0.62, - "grad_norm": 2.6266749863791805, - "learning_rate": 3.374424310990862e-06, - "loss": 0.6148, + "epoch": 0.42, + "grad_norm": 2.0482434941905243, + "learning_rate": 6.571946032348046e-06, + "loss": 0.5391, "step": 5866 }, { - "epoch": 0.62, - "grad_norm": 2.513373641942084, - "learning_rate": 3.372812750966087e-06, - "loss": 0.6433, + "epoch": 0.42, + "grad_norm": 1.6305100511765362, + "learning_rate": 6.570855093334614e-06, + "loss": 0.5649, "step": 5867 }, { - "epoch": 0.62, - "grad_norm": 2.884160237208583, - "learning_rate": 3.371201379983223e-06, - "loss": 0.5771, + "epoch": 0.42, + "grad_norm": 2.3945410403814242, + "learning_rate": 6.5697640713432906e-06, + "loss": 0.5004, "step": 5868 }, { - "epoch": 0.62, - "grad_norm": 3.2671886681135485, - "learning_rate": 3.369590198229473e-06, - "loss": 0.5949, + "epoch": 0.42, + "grad_norm": 1.417007383330024, + "learning_rate": 6.568672966431711e-06, + "loss": 0.4867, "step": 5869 }, { - "epoch": 0.62, - "grad_norm": 2.2010932339105223, - "learning_rate": 3.3679792058920223e-06, - "loss": 0.677, + "epoch": 0.42, + "grad_norm": 1.6052677574956207, + "learning_rate": 6.56758177865751e-06, + "loss": 0.6394, "step": 5870 }, { - "epoch": 0.62, - "grad_norm": 2.8453784039703374, - "learning_rate": 3.366368403158028e-06, - "loss": 0.604, + "epoch": 0.42, + "grad_norm": 1.543452467672601, + "learning_rate": 6.566490508078328e-06, + "loss": 0.531, "step": 5871 }, { - "epoch": 0.62, - "grad_norm": 2.641273584480205, - "learning_rate": 3.3647577902146334e-06, - "loss": 0.6553, + "epoch": 0.42, + "grad_norm": 1.950585766806838, + "learning_rate": 6.56539915475181e-06, + "loss": 0.5164, "step": 5872 }, { - "epoch": 0.62, - "grad_norm": 2.3540207447949997, - "learning_rate": 3.363147367248949e-06, - "loss": 0.6874, + "epoch": 0.42, + "grad_norm": 1.4275478282152232, + "learning_rate": 6.564307718735604e-06, + "loss": 0.5028, "step": 5873 }, { - "epoch": 0.62, - "grad_norm": 2.1061808611226644, - "learning_rate": 3.3615371344480725e-06, - "loss": 0.5198, + "epoch": 0.42, + "grad_norm": 1.5625520150603835, + "learning_rate": 6.563216200087364e-06, + "loss": 0.4954, "step": 5874 }, { - "epoch": 0.62, - "grad_norm": 3.610784500535765, - "learning_rate": 3.3599270919990744e-06, - "loss": 0.6982, + "epoch": 0.42, + "grad_norm": 1.6775833577708843, + "learning_rate": 6.562124598864748e-06, + "loss": 0.6345, "step": 5875 }, { - "epoch": 0.62, - "grad_norm": 17.35217476906217, - "learning_rate": 3.358317240089008e-06, - "loss": 0.6382, + "epoch": 0.42, + "grad_norm": 1.7764478091407114, + "learning_rate": 6.561032915125416e-06, + "loss": 0.5393, "step": 5876 }, { - "epoch": 0.62, - "grad_norm": 2.3499225456097874, - "learning_rate": 3.3567075789048973e-06, - "loss": 0.6571, + "epoch": 0.42, + "grad_norm": 1.5300578567796672, + "learning_rate": 6.559941148927035e-06, + "loss": 0.5924, "step": 5877 }, { - "epoch": 0.62, - "grad_norm": 4.846781393555933, - "learning_rate": 3.35509810863375e-06, - "loss": 0.7104, + "epoch": 0.42, + "grad_norm": 1.6533778929847478, + "learning_rate": 6.5588493003272794e-06, + "loss": 0.569, "step": 5878 }, { - "epoch": 0.62, - "grad_norm": 3.0399172317674497, - "learning_rate": 3.35348882946255e-06, - "loss": 0.6536, + "epoch": 0.42, + "grad_norm": 1.6081161954902436, + "learning_rate": 6.557757369383819e-06, + "loss": 0.5254, "step": 5879 }, { - "epoch": 0.62, - "grad_norm": 3.2181809417118203, - "learning_rate": 3.3518797415782577e-06, - "loss": 0.5946, + "epoch": 0.42, + "grad_norm": 1.679702874401066, + "learning_rate": 6.556665356154338e-06, + "loss": 0.5074, "step": 5880 }, { - "epoch": 0.62, - "grad_norm": 3.7651214632668175, - "learning_rate": 3.3502708451678145e-06, - "loss": 0.6062, + "epoch": 0.42, + "grad_norm": 1.7384737418466396, + "learning_rate": 6.555573260696517e-06, + "loss": 0.5744, "step": 5881 }, { - "epoch": 0.62, - "grad_norm": 2.5978846672711, - "learning_rate": 3.348662140418133e-06, - "loss": 0.6363, + "epoch": 0.42, + "grad_norm": 1.6065862374810773, + "learning_rate": 6.554481083068047e-06, + "loss": 0.5422, "step": 5882 }, { - "epoch": 0.62, - "grad_norm": 7.165618488483955, - "learning_rate": 3.3470536275161126e-06, - "loss": 0.5837, + "epoch": 0.42, + "grad_norm": 1.6710795018254225, + "learning_rate": 6.553388823326617e-06, + "loss": 0.6401, "step": 5883 }, { - "epoch": 0.62, - "grad_norm": 3.6883277185410073, - "learning_rate": 3.3454453066486214e-06, - "loss": 0.6308, + "epoch": 0.42, + "grad_norm": 1.684213221474878, + "learning_rate": 6.552296481529927e-06, + "loss": 0.5538, "step": 5884 }, { - "epoch": 0.62, - "grad_norm": 2.1630804016878504, - "learning_rate": 3.3438371780025136e-06, - "loss": 0.6886, + "epoch": 0.42, + "grad_norm": 8.780768900918462, + "learning_rate": 6.551204057735676e-06, + "loss": 0.5385, "step": 5885 }, { - "epoch": 0.62, - "grad_norm": 2.269499258419974, - "learning_rate": 3.342229241764615e-06, - "loss": 0.6706, + "epoch": 0.42, + "grad_norm": 1.782050817111433, + "learning_rate": 6.55011155200157e-06, + "loss": 0.5255, "step": 5886 }, { - "epoch": 0.62, - "grad_norm": 2.3398677700347497, - "learning_rate": 3.34062149812173e-06, - "loss": 0.6235, + "epoch": 0.42, + "grad_norm": 1.5651635346159511, + "learning_rate": 6.5490189643853204e-06, + "loss": 0.4805, "step": 5887 }, { - "epoch": 0.62, - "grad_norm": 2.507757222319039, - "learning_rate": 3.339013947260642e-06, - "loss": 0.5739, + "epoch": 0.42, + "grad_norm": 1.9920381791642292, + "learning_rate": 6.547926294944641e-06, + "loss": 0.5692, "step": 5888 }, { - "epoch": 0.62, - "grad_norm": 2.9578807377909313, - "learning_rate": 3.337406589368113e-06, - "loss": 0.6063, + "epoch": 0.42, + "grad_norm": 1.9985165058227028, + "learning_rate": 6.546833543737249e-06, + "loss": 0.5639, "step": 5889 }, { - "epoch": 0.62, - "grad_norm": 2.2486291156021263, - "learning_rate": 3.3357994246308815e-06, - "loss": 0.6258, + "epoch": 0.42, + "grad_norm": 5.308138930059173, + "learning_rate": 6.545740710820867e-06, + "loss": 0.4833, "step": 5890 }, { - "epoch": 0.62, - "grad_norm": 2.4560317757961596, - "learning_rate": 3.3341924532356605e-06, - "loss": 0.6704, + "epoch": 0.42, + "grad_norm": 1.6520408835743527, + "learning_rate": 6.544647796253223e-06, + "loss": 0.5861, "step": 5891 }, { - "epoch": 0.62, - "grad_norm": 2.4000882896893856, - "learning_rate": 3.3325856753691453e-06, - "loss": 0.6653, + "epoch": 0.42, + "grad_norm": 1.5230205505046615, + "learning_rate": 6.543554800092049e-06, + "loss": 0.526, "step": 5892 }, { - "epoch": 0.62, - "grad_norm": 2.309148094791573, - "learning_rate": 3.3309790912180056e-06, - "loss": 0.5464, + "epoch": 0.42, + "grad_norm": 1.4137261112697015, + "learning_rate": 6.542461722395082e-06, + "loss": 0.4702, "step": 5893 }, { - "epoch": 0.62, - "grad_norm": 2.9793468104494654, - "learning_rate": 3.329372700968894e-06, - "loss": 0.6522, + "epoch": 0.42, + "grad_norm": 1.4328149194260784, + "learning_rate": 6.541368563220056e-06, + "loss": 0.5226, "step": 5894 }, { - "epoch": 0.62, - "grad_norm": 3.234552857190179, - "learning_rate": 3.3277665048084283e-06, - "loss": 0.6763, + "epoch": 0.42, + "grad_norm": 1.9595813454904862, + "learning_rate": 6.540275322624724e-06, + "loss": 0.61, "step": 5895 }, { - "epoch": 0.62, - "grad_norm": 2.305618418961012, - "learning_rate": 3.326160502923218e-06, - "loss": 0.5466, + "epoch": 0.42, + "grad_norm": 1.5553627532159, + "learning_rate": 6.539182000666828e-06, + "loss": 0.5479, "step": 5896 }, { - "epoch": 0.62, - "grad_norm": 2.4393905813677113, - "learning_rate": 3.324554695499841e-06, - "loss": 0.6869, + "epoch": 0.42, + "grad_norm": 2.073676791399435, + "learning_rate": 6.538088597404124e-06, + "loss": 0.5596, "step": 5897 }, { - "epoch": 0.62, - "grad_norm": 3.1678663773433686, - "learning_rate": 3.3229490827248585e-06, - "loss": 0.6572, + "epoch": 0.42, + "grad_norm": 2.237645027137355, + "learning_rate": 6.536995112894368e-06, + "loss": 0.5821, "step": 5898 }, { - "epoch": 0.62, - "grad_norm": 2.810314446746764, - "learning_rate": 3.3213436647848017e-06, - "loss": 0.6039, + "epoch": 0.42, + "grad_norm": 1.955893069975835, + "learning_rate": 6.535901547195322e-06, + "loss": 0.5331, "step": 5899 }, { - "epoch": 0.62, - "grad_norm": 2.616553362514333, - "learning_rate": 3.3197384418661854e-06, - "loss": 0.6404, + "epoch": 0.42, + "grad_norm": 1.8739029115448256, + "learning_rate": 6.534807900364751e-06, + "loss": 0.6017, "step": 5900 }, { - "epoch": 0.62, - "grad_norm": 2.606456810157447, - "learning_rate": 3.3181334141555003e-06, - "loss": 0.6813, + "epoch": 0.42, + "grad_norm": 1.6709628465706683, + "learning_rate": 6.533714172460427e-06, + "loss": 0.5285, "step": 5901 }, { - "epoch": 0.62, - "grad_norm": 3.352922919680765, - "learning_rate": 3.3165285818392133e-06, - "loss": 0.5888, + "epoch": 0.42, + "grad_norm": 2.209478194424544, + "learning_rate": 6.532620363540124e-06, + "loss": 0.577, "step": 5902 }, { - "epoch": 0.62, - "grad_norm": 3.765095323296954, - "learning_rate": 3.3149239451037706e-06, - "loss": 0.6115, + "epoch": 0.42, + "grad_norm": 2.359487017867562, + "learning_rate": 6.53152647366162e-06, + "loss": 0.5293, "step": 5903 }, { - "epoch": 0.62, - "grad_norm": 2.0882597233379716, - "learning_rate": 3.313319504135591e-06, - "loss": 0.6357, + "epoch": 0.42, + "grad_norm": 1.5308789261698046, + "learning_rate": 6.530432502882699e-06, + "loss": 0.4882, "step": 5904 }, { - "epoch": 0.62, - "grad_norm": 2.3840323224262576, - "learning_rate": 3.3117152591210765e-06, - "loss": 0.6076, + "epoch": 0.42, + "grad_norm": 2.397453191396718, + "learning_rate": 6.5293384512611455e-06, + "loss": 0.5311, "step": 5905 }, { - "epoch": 0.62, - "grad_norm": 12.879550786415358, - "learning_rate": 3.3101112102466014e-06, - "loss": 0.6847, + "epoch": 0.42, + "grad_norm": 1.869570671392317, + "learning_rate": 6.528244318854754e-06, + "loss": 0.5529, "step": 5906 }, { - "epoch": 0.62, - "grad_norm": 0.9258667628138425, - "learning_rate": 3.308507357698525e-06, - "loss": 0.5716, + "epoch": 0.42, + "grad_norm": 1.444901815728329, + "learning_rate": 6.5271501057213185e-06, + "loss": 0.5014, "step": 5907 }, { - "epoch": 0.62, - "grad_norm": 2.468162429286832, - "learning_rate": 3.30690370166317e-06, - "loss": 0.6944, + "epoch": 0.42, + "grad_norm": 1.6260663147828214, + "learning_rate": 6.526055811918641e-06, + "loss": 0.5441, "step": 5908 }, { - "epoch": 0.62, - "grad_norm": 3.4587947376198205, - "learning_rate": 3.30530024232685e-06, - "loss": 0.6611, + "epoch": 0.42, + "grad_norm": 0.7028672509522572, + "learning_rate": 6.524961437504523e-06, + "loss": 0.4617, "step": 5909 }, { - "epoch": 0.62, - "grad_norm": 2.7167118186662447, - "learning_rate": 3.3036969798758486e-06, - "loss": 0.6919, + "epoch": 0.42, + "grad_norm": 1.738166921076007, + "learning_rate": 6.523866982536776e-06, + "loss": 0.5323, "step": 5910 }, { - "epoch": 0.62, - "grad_norm": 2.8540713326378624, - "learning_rate": 3.3020939144964298e-06, - "loss": 0.5524, + "epoch": 0.42, + "grad_norm": 1.5900624286194096, + "learning_rate": 6.52277244707321e-06, + "loss": 0.5222, "step": 5911 }, { - "epoch": 0.62, - "grad_norm": 2.633945852372088, - "learning_rate": 3.3004910463748315e-06, - "loss": 0.6379, + "epoch": 0.42, + "grad_norm": 0.8163872490810836, + "learning_rate": 6.521677831171646e-06, + "loss": 0.4267, "step": 5912 }, { - "epoch": 0.62, - "grad_norm": 2.6471695256154977, - "learning_rate": 3.298888375697269e-06, - "loss": 0.6573, + "epoch": 0.42, + "grad_norm": 1.8027939439904435, + "learning_rate": 6.5205831348898995e-06, + "loss": 0.5573, "step": 5913 }, { - "epoch": 0.62, - "grad_norm": 2.7889126114993785, - "learning_rate": 3.2972859026499395e-06, - "loss": 0.5756, + "epoch": 0.42, + "grad_norm": 1.6593700167849381, + "learning_rate": 6.519488358285801e-06, + "loss": 0.5363, "step": 5914 }, { - "epoch": 0.62, - "grad_norm": 2.4597559748985436, - "learning_rate": 3.2956836274190107e-06, - "loss": 0.6519, + "epoch": 0.42, + "grad_norm": 1.6383081589122526, + "learning_rate": 6.518393501417178e-06, + "loss": 0.5566, "step": 5915 }, { - "epoch": 0.62, - "grad_norm": 6.22590279694477, - "learning_rate": 3.294081550190633e-06, - "loss": 0.5535, + "epoch": 0.42, + "grad_norm": 1.8303875463301966, + "learning_rate": 6.517298564341867e-06, + "loss": 0.5522, "step": 5916 }, { - "epoch": 0.62, - "grad_norm": 3.1209221601644255, - "learning_rate": 3.292479671150927e-06, - "loss": 0.6076, + "epoch": 0.42, + "grad_norm": 1.7389136549573714, + "learning_rate": 6.5162035471177035e-06, + "loss": 0.5705, "step": 5917 }, { - "epoch": 0.62, - "grad_norm": 2.6511690459462853, - "learning_rate": 3.290877990485999e-06, - "loss": 0.5224, + "epoch": 0.42, + "grad_norm": 1.855754770762998, + "learning_rate": 6.51510844980253e-06, + "loss": 0.5392, "step": 5918 }, { - "epoch": 0.62, - "grad_norm": 1.017349317050653, - "learning_rate": 3.2892765083819233e-06, - "loss": 0.515, + "epoch": 0.42, + "grad_norm": 2.0480221567265353, + "learning_rate": 6.514013272454196e-06, + "loss": 0.5825, "step": 5919 }, { - "epoch": 0.62, - "grad_norm": 2.7093325400474417, - "learning_rate": 3.28767522502476e-06, - "loss": 0.6475, + "epoch": 0.42, + "grad_norm": 1.5080935725586666, + "learning_rate": 6.5129180151305495e-06, + "loss": 0.6111, "step": 5920 }, { - "epoch": 0.62, - "grad_norm": 2.471371183047004, - "learning_rate": 3.2860741406005383e-06, - "loss": 0.5238, + "epoch": 0.42, + "grad_norm": 2.293947495216481, + "learning_rate": 6.5118226778894465e-06, + "loss": 0.5984, "step": 5921 }, { - "epoch": 0.62, - "grad_norm": 2.6312257555514824, - "learning_rate": 3.2844732552952686e-06, - "loss": 0.6347, + "epoch": 0.42, + "grad_norm": 0.8635438813503999, + "learning_rate": 6.510727260788747e-06, + "loss": 0.4408, "step": 5922 }, { - "epoch": 0.62, - "grad_norm": 2.411758053136448, - "learning_rate": 3.282872569294936e-06, - "loss": 0.6182, + "epoch": 0.42, + "grad_norm": 1.645348246440866, + "learning_rate": 6.509631763886317e-06, + "loss": 0.5985, "step": 5923 }, { - "epoch": 0.62, - "grad_norm": 2.5478750822742935, - "learning_rate": 3.281272082785506e-06, - "loss": 0.481, + "epoch": 0.42, + "grad_norm": 1.5713177544585595, + "learning_rate": 6.50853618724002e-06, + "loss": 0.6187, "step": 5924 }, { - "epoch": 0.62, - "grad_norm": 2.3428946425529644, - "learning_rate": 3.2796717959529167e-06, - "loss": 0.6574, + "epoch": 0.42, + "grad_norm": 1.7817240764026094, + "learning_rate": 6.5074405309077305e-06, + "loss": 0.5466, "step": 5925 }, { - "epoch": 0.62, - "grad_norm": 2.5726580568876902, - "learning_rate": 3.2780717089830845e-06, - "loss": 0.6016, + "epoch": 0.42, + "grad_norm": 1.5919862255741748, + "learning_rate": 6.506344794947324e-06, + "loss": 0.5108, "step": 5926 }, { - "epoch": 0.62, - "grad_norm": 2.284631597557582, - "learning_rate": 3.276471822061904e-06, - "loss": 0.5906, + "epoch": 0.42, + "grad_norm": 2.512349478548671, + "learning_rate": 6.505248979416682e-06, + "loss": 0.5816, "step": 5927 }, { - "epoch": 0.62, - "grad_norm": 3.003461067354797, - "learning_rate": 3.2748721353752445e-06, - "loss": 0.683, + "epoch": 0.42, + "grad_norm": 1.5184983906760976, + "learning_rate": 6.504153084373688e-06, + "loss": 0.5013, "step": 5928 }, { - "epoch": 0.62, - "grad_norm": 2.570345614518788, - "learning_rate": 3.2732726491089562e-06, - "loss": 0.6233, + "epoch": 0.42, + "grad_norm": 1.5715874199727056, + "learning_rate": 6.50305710987623e-06, + "loss": 0.5045, "step": 5929 }, { - "epoch": 0.62, - "grad_norm": 2.7306210262884796, - "learning_rate": 3.2716733634488563e-06, - "loss": 0.6855, + "epoch": 0.42, + "grad_norm": 2.457520276923852, + "learning_rate": 6.501961055982202e-06, + "loss": 0.5315, "step": 5930 }, { - "epoch": 0.62, - "grad_norm": 2.4961398274059046, - "learning_rate": 3.2700742785807503e-06, - "loss": 0.6488, + "epoch": 0.42, + "grad_norm": 1.8022393562107695, + "learning_rate": 6.500864922749505e-06, + "loss": 0.5655, "step": 5931 }, { - "epoch": 0.62, - "grad_norm": 2.674949765424381, - "learning_rate": 3.2684753946904136e-06, - "loss": 0.5726, + "epoch": 0.42, + "grad_norm": 1.7217025605208927, + "learning_rate": 6.499768710236034e-06, + "loss": 0.6071, "step": 5932 }, { - "epoch": 0.62, - "grad_norm": 3.1722015237208283, - "learning_rate": 3.266876711963602e-06, - "loss": 0.5482, + "epoch": 0.42, + "grad_norm": 1.6193105576328, + "learning_rate": 6.4986724184996984e-06, + "loss": 0.5671, "step": 5933 }, { - "epoch": 0.62, - "grad_norm": 2.4532631137041836, - "learning_rate": 3.265278230586043e-06, - "loss": 0.6033, + "epoch": 0.42, + "grad_norm": 1.5333290148299747, + "learning_rate": 6.49757604759841e-06, + "loss": 0.5555, "step": 5934 }, { - "epoch": 0.62, - "grad_norm": 5.84912133996752, - "learning_rate": 3.2636799507434447e-06, - "loss": 0.6797, + "epoch": 0.42, + "grad_norm": 1.5497840864146137, + "learning_rate": 6.496479597590077e-06, + "loss": 0.6516, "step": 5935 }, { - "epoch": 0.62, - "grad_norm": 2.687307510260449, - "learning_rate": 3.2620818726214888e-06, - "loss": 0.6185, + "epoch": 0.42, + "grad_norm": 1.640125888594574, + "learning_rate": 6.4953830685326225e-06, + "loss": 0.5216, "step": 5936 }, { - "epoch": 0.62, - "grad_norm": 2.3580307891356673, - "learning_rate": 3.260483996405839e-06, - "loss": 0.6161, + "epoch": 0.42, + "grad_norm": 2.5640860438922024, + "learning_rate": 6.494286460483966e-06, + "loss": 0.5376, "step": 5937 }, { - "epoch": 0.62, - "grad_norm": 2.074986511593617, - "learning_rate": 3.2588863222821306e-06, - "loss": 0.6061, + "epoch": 0.42, + "grad_norm": 0.7908045999289842, + "learning_rate": 6.493189773502038e-06, + "loss": 0.4581, "step": 5938 }, { - "epoch": 0.62, - "grad_norm": 2.6795167053076145, - "learning_rate": 3.2572888504359743e-06, - "loss": 0.648, + "epoch": 0.42, + "grad_norm": 0.8532859801656744, + "learning_rate": 6.492093007644764e-06, + "loss": 0.4498, "step": 5939 }, { - "epoch": 0.63, - "grad_norm": 0.9485630736671, - "learning_rate": 3.2556915810529627e-06, - "loss": 0.5871, + "epoch": 0.42, + "grad_norm": 0.8497514367208415, + "learning_rate": 6.490996162970084e-06, + "loss": 0.4493, "step": 5940 }, { - "epoch": 0.63, - "grad_norm": 3.589804098570901, - "learning_rate": 3.25409451431866e-06, - "loss": 0.5836, + "epoch": 0.42, + "grad_norm": 1.6745980467393213, + "learning_rate": 6.489899239535932e-06, + "loss": 0.5157, "step": 5941 }, { - "epoch": 0.63, - "grad_norm": 2.029028547694551, - "learning_rate": 3.2524976504186125e-06, - "loss": 0.6208, + "epoch": 0.42, + "grad_norm": 1.5036305212824783, + "learning_rate": 6.488802237400254e-06, + "loss": 0.5738, "step": 5942 }, { - "epoch": 0.63, - "grad_norm": 2.457015002957502, - "learning_rate": 3.2509009895383337e-06, - "loss": 0.5133, + "epoch": 0.42, + "grad_norm": 0.8026634387789531, + "learning_rate": 6.487705156620998e-06, + "loss": 0.4301, "step": 5943 }, { - "epoch": 0.63, - "grad_norm": 2.7782124039908958, - "learning_rate": 3.249304531863323e-06, - "loss": 0.6363, + "epoch": 0.42, + "grad_norm": 1.4659958864159832, + "learning_rate": 6.486607997256114e-06, + "loss": 0.5472, "step": 5944 }, { - "epoch": 0.63, - "grad_norm": 2.7887314599627198, - "learning_rate": 3.247708277579049e-06, - "loss": 0.693, + "epoch": 0.42, + "grad_norm": 1.606843741757065, + "learning_rate": 6.485510759363558e-06, + "loss": 0.6044, "step": 5945 }, { - "epoch": 0.63, - "grad_norm": 5.059722447279897, - "learning_rate": 3.2461122268709657e-06, - "loss": 0.6958, + "epoch": 0.42, + "grad_norm": 1.6267398090021095, + "learning_rate": 6.484413443001292e-06, + "loss": 0.4863, "step": 5946 }, { - "epoch": 0.63, - "grad_norm": 3.609162372409086, - "learning_rate": 3.244516379924492e-06, - "loss": 0.5227, + "epoch": 0.42, + "grad_norm": 1.454224818949021, + "learning_rate": 6.483316048227275e-06, + "loss": 0.5441, "step": 5947 }, { - "epoch": 0.63, - "grad_norm": 2.5711727147391903, - "learning_rate": 3.242920736925029e-06, - "loss": 0.5873, + "epoch": 0.42, + "grad_norm": 1.6579565543114174, + "learning_rate": 6.482218575099481e-06, + "loss": 0.5053, "step": 5948 }, { - "epoch": 0.63, - "grad_norm": 2.965512354865726, - "learning_rate": 3.2413252980579572e-06, - "loss": 0.5924, + "epoch": 0.42, + "grad_norm": 0.7525875137360138, + "learning_rate": 6.481121023675878e-06, + "loss": 0.4485, "step": 5949 }, { - "epoch": 0.63, - "grad_norm": 4.480684034499855, - "learning_rate": 3.239730063508629e-06, - "loss": 0.6539, + "epoch": 0.42, + "grad_norm": 1.7427633200258947, + "learning_rate": 6.480023394014443e-06, + "loss": 0.5541, "step": 5950 }, { - "epoch": 0.63, - "grad_norm": 3.0554230163758693, - "learning_rate": 3.238135033462374e-06, - "loss": 0.6008, + "epoch": 0.42, + "grad_norm": 1.4199231134223715, + "learning_rate": 6.478925686173158e-06, + "loss": 0.5146, "step": 5951 }, { - "epoch": 0.63, - "grad_norm": 2.264871265257945, - "learning_rate": 3.2365402081044955e-06, - "loss": 0.7099, + "epoch": 0.42, + "grad_norm": 1.508635767035549, + "learning_rate": 6.4778279002100075e-06, + "loss": 0.5548, "step": 5952 }, { - "epoch": 0.63, - "grad_norm": 2.4974903193187594, - "learning_rate": 3.2349455876202797e-06, - "loss": 0.6184, + "epoch": 0.42, + "grad_norm": 1.6757180312735853, + "learning_rate": 6.4767300361829814e-06, + "loss": 0.4886, "step": 5953 }, { - "epoch": 0.63, - "grad_norm": 2.18292964645188, - "learning_rate": 3.2333511721949816e-06, - "loss": 0.576, + "epoch": 0.42, + "grad_norm": 1.5588845703950929, + "learning_rate": 6.475632094150068e-06, + "loss": 0.5178, "step": 5954 }, { - "epoch": 0.63, - "grad_norm": 2.187323524231644, - "learning_rate": 3.23175696201384e-06, - "loss": 0.6029, + "epoch": 0.42, + "grad_norm": 2.516705531946218, + "learning_rate": 6.474534074169269e-06, + "loss": 0.5253, "step": 5955 }, { - "epoch": 0.63, - "grad_norm": 2.42051305833246, - "learning_rate": 3.230162957262062e-06, - "loss": 0.5949, + "epoch": 0.42, + "grad_norm": 1.6518195383974432, + "learning_rate": 6.473435976298583e-06, + "loss": 0.5285, "step": 5956 }, { - "epoch": 0.63, - "grad_norm": 2.437281602803996, - "learning_rate": 3.2285691581248345e-06, - "loss": 0.6308, + "epoch": 0.42, + "grad_norm": 0.7626824341594294, + "learning_rate": 6.472337800596017e-06, + "loss": 0.4354, "step": 5957 }, { - "epoch": 0.63, - "grad_norm": 2.188705793501174, - "learning_rate": 3.226975564787322e-06, - "loss": 0.6577, + "epoch": 0.42, + "grad_norm": 1.624042820353851, + "learning_rate": 6.471239547119577e-06, + "loss": 0.594, "step": 5958 }, { - "epoch": 0.63, - "grad_norm": 2.095497000196284, - "learning_rate": 3.2253821774346644e-06, - "loss": 0.613, + "epoch": 0.42, + "grad_norm": 1.4637859970149665, + "learning_rate": 6.470141215927281e-06, + "loss": 0.5148, "step": 5959 }, { - "epoch": 0.63, - "grad_norm": 2.217153208860546, - "learning_rate": 3.2237889962519748e-06, - "loss": 0.5494, + "epoch": 0.42, + "grad_norm": 0.7511731471691121, + "learning_rate": 6.469042807077144e-06, + "loss": 0.4655, "step": 5960 }, { - "epoch": 0.63, - "grad_norm": 3.0997362892532534, - "learning_rate": 3.2221960214243437e-06, - "loss": 0.6854, + "epoch": 0.42, + "grad_norm": 1.4096498393069241, + "learning_rate": 6.467944320627188e-06, + "loss": 0.4452, "step": 5961 }, { - "epoch": 0.63, - "grad_norm": 2.176771504376497, - "learning_rate": 3.2206032531368407e-06, - "loss": 0.625, + "epoch": 0.42, + "grad_norm": 1.4871527081230382, + "learning_rate": 6.466845756635439e-06, + "loss": 0.5684, "step": 5962 }, { - "epoch": 0.63, - "grad_norm": 3.3368855193919957, - "learning_rate": 3.2190106915745077e-06, - "loss": 0.6591, + "epoch": 0.42, + "grad_norm": 2.2584959584341315, + "learning_rate": 6.465747115159927e-06, + "loss": 0.6267, "step": 5963 }, { - "epoch": 0.63, - "grad_norm": 2.221750102788297, - "learning_rate": 3.2174183369223667e-06, - "loss": 0.5971, + "epoch": 0.42, + "grad_norm": 2.035223220833597, + "learning_rate": 6.464648396258686e-06, + "loss": 0.5675, "step": 5964 }, { - "epoch": 0.63, - "grad_norm": 2.3884878579108473, - "learning_rate": 3.2158261893654073e-06, - "loss": 0.7223, + "epoch": 0.42, + "grad_norm": 1.8035887043566976, + "learning_rate": 6.463549599989752e-06, + "loss": 0.5917, "step": 5965 }, { - "epoch": 0.63, - "grad_norm": 2.8667030280760954, - "learning_rate": 3.214234249088605e-06, - "loss": 0.6087, + "epoch": 0.42, + "grad_norm": 1.5035927735922108, + "learning_rate": 6.46245072641117e-06, + "loss": 0.5117, "step": 5966 }, { - "epoch": 0.63, - "grad_norm": 2.172551788871772, - "learning_rate": 3.212642516276905e-06, - "loss": 0.5243, + "epoch": 0.42, + "grad_norm": 1.600201071877732, + "learning_rate": 6.461351775580986e-06, + "loss": 0.6081, "step": 5967 }, { - "epoch": 0.63, - "grad_norm": 2.7291377150296534, - "learning_rate": 3.2110509911152315e-06, - "loss": 0.5997, + "epoch": 0.42, + "grad_norm": 1.52042786557807, + "learning_rate": 6.46025274755725e-06, + "loss": 0.5214, "step": 5968 }, { - "epoch": 0.63, - "grad_norm": 1.9384746293091961, - "learning_rate": 3.209459673788482e-06, - "loss": 0.5464, + "epoch": 0.42, + "grad_norm": 1.8002064114917573, + "learning_rate": 6.459153642398014e-06, + "loss": 0.5806, "step": 5969 }, { - "epoch": 0.63, - "grad_norm": 2.058472029705662, - "learning_rate": 3.207868564481532e-06, - "loss": 0.5822, + "epoch": 0.42, + "grad_norm": 1.863842937185797, + "learning_rate": 6.458054460161339e-06, + "loss": 0.5093, "step": 5970 }, { - "epoch": 0.63, - "grad_norm": 2.567086993673489, - "learning_rate": 3.2062776633792303e-06, - "loss": 0.6797, + "epoch": 0.42, + "grad_norm": 1.7742606767883655, + "learning_rate": 6.456955200905287e-06, + "loss": 0.5648, "step": 5971 }, { - "epoch": 0.63, - "grad_norm": 3.1555293622681844, - "learning_rate": 3.2046869706664074e-06, - "loss": 0.5843, + "epoch": 0.42, + "grad_norm": 2.037778014924978, + "learning_rate": 6.455855864687924e-06, + "loss": 0.5612, "step": 5972 }, { - "epoch": 0.63, - "grad_norm": 2.3596960035354035, - "learning_rate": 3.2030964865278604e-06, - "loss": 0.5639, + "epoch": 0.42, + "grad_norm": 1.8151945670938043, + "learning_rate": 6.454756451567321e-06, + "loss": 0.5197, "step": 5973 }, { - "epoch": 0.63, - "grad_norm": 2.0640908869935912, - "learning_rate": 3.2015062111483688e-06, - "loss": 0.6841, + "epoch": 0.42, + "grad_norm": 0.7798079708825238, + "learning_rate": 6.453656961601554e-06, + "loss": 0.4347, "step": 5974 }, { - "epoch": 0.63, - "grad_norm": 2.689198480543773, - "learning_rate": 3.199916144712688e-06, - "loss": 0.6605, + "epoch": 0.42, + "grad_norm": 1.9594605050070328, + "learning_rate": 6.4525573948487e-06, + "loss": 0.6037, "step": 5975 }, { - "epoch": 0.63, - "grad_norm": 2.6877494467316985, - "learning_rate": 3.1983262874055442e-06, - "loss": 0.5771, + "epoch": 0.42, + "grad_norm": 2.4363698310556376, + "learning_rate": 6.451457751366843e-06, + "loss": 0.5556, "step": 5976 }, { - "epoch": 0.63, - "grad_norm": 2.772844595362914, - "learning_rate": 3.1967366394116477e-06, - "loss": 0.6137, + "epoch": 0.42, + "grad_norm": 1.5437835770149808, + "learning_rate": 6.450358031214069e-06, + "loss": 0.5531, "step": 5977 }, { - "epoch": 0.63, - "grad_norm": 1.0203362646742848, - "learning_rate": 3.1951472009156725e-06, - "loss": 0.5498, + "epoch": 0.42, + "grad_norm": 1.5400375017935015, + "learning_rate": 6.44925823444847e-06, + "loss": 0.5449, "step": 5978 }, { - "epoch": 0.63, - "grad_norm": 2.242251234684735, - "learning_rate": 3.193557972102279e-06, - "loss": 0.6582, + "epoch": 0.42, + "grad_norm": 2.1801814565703608, + "learning_rate": 6.448158361128139e-06, + "loss": 0.5186, "step": 5979 }, { - "epoch": 0.63, - "grad_norm": 3.8382421017907684, - "learning_rate": 3.191968953156098e-06, - "loss": 0.6473, + "epoch": 0.42, + "grad_norm": 1.6283196205715558, + "learning_rate": 6.447058411311176e-06, + "loss": 0.5737, "step": 5980 }, { - "epoch": 0.63, - "grad_norm": 2.1209413221002222, - "learning_rate": 3.19038014426174e-06, - "loss": 0.6004, + "epoch": 0.42, + "grad_norm": 1.6577426403853723, + "learning_rate": 6.445958385055685e-06, + "loss": 0.5764, "step": 5981 }, { - "epoch": 0.63, - "grad_norm": 2.2255883161813963, - "learning_rate": 3.1887915456037833e-06, - "loss": 0.6218, + "epoch": 0.42, + "grad_norm": 1.9572003437188972, + "learning_rate": 6.4448582824197726e-06, + "loss": 0.6231, "step": 5982 }, { - "epoch": 0.63, - "grad_norm": 2.8783014772228532, - "learning_rate": 3.1872031573667895e-06, - "loss": 0.7142, + "epoch": 0.42, + "grad_norm": 2.4984052705815953, + "learning_rate": 6.44375810346155e-06, + "loss": 0.5428, "step": 5983 }, { - "epoch": 0.63, - "grad_norm": 2.5531410132078363, - "learning_rate": 3.185614979735293e-06, - "loss": 0.5936, + "epoch": 0.42, + "grad_norm": 1.6047753135809402, + "learning_rate": 6.442657848239132e-06, + "loss": 0.5813, "step": 5984 }, { - "epoch": 0.63, - "grad_norm": 2.565229169837344, - "learning_rate": 3.1840270128938032e-06, - "loss": 0.6143, + "epoch": 0.42, + "grad_norm": 1.7338255396487672, + "learning_rate": 6.441557516810639e-06, + "loss": 0.574, "step": 5985 }, { - "epoch": 0.63, - "grad_norm": 2.045126577655692, - "learning_rate": 3.182439257026807e-06, - "loss": 0.5457, + "epoch": 0.42, + "grad_norm": 1.8992292879482646, + "learning_rate": 6.4404571092341925e-06, + "loss": 0.5254, "step": 5986 }, { - "epoch": 0.63, - "grad_norm": 2.0857258123928033, - "learning_rate": 3.180851712318761e-06, - "loss": 0.6753, + "epoch": 0.42, + "grad_norm": 1.5875054890267597, + "learning_rate": 6.4393566255679206e-06, + "loss": 0.475, "step": 5987 }, { - "epoch": 0.63, - "grad_norm": 2.631692022599449, - "learning_rate": 3.179264378954106e-06, - "loss": 0.5913, + "epoch": 0.42, + "grad_norm": 2.0799155168446557, + "learning_rate": 6.4382560658699545e-06, + "loss": 0.5293, "step": 5988 }, { - "epoch": 0.63, - "grad_norm": 3.8525652346655925, - "learning_rate": 3.1776772571172514e-06, - "loss": 0.7053, + "epoch": 0.42, + "grad_norm": 0.8344387982173572, + "learning_rate": 6.43715543019843e-06, + "loss": 0.4564, "step": 5989 }, { - "epoch": 0.63, - "grad_norm": 2.2544931285363856, - "learning_rate": 3.1760903469925874e-06, - "loss": 0.6392, + "epoch": 0.43, + "grad_norm": 0.796720829663042, + "learning_rate": 6.436054718611485e-06, + "loss": 0.4631, "step": 5990 }, { - "epoch": 0.63, - "grad_norm": 2.2143334987948213, - "learning_rate": 3.174503648764473e-06, - "loss": 0.6196, + "epoch": 0.43, + "grad_norm": 1.8938441992952706, + "learning_rate": 6.4349539311672645e-06, + "loss": 0.5094, "step": 5991 }, { - "epoch": 0.63, - "grad_norm": 2.7607820463483015, - "learning_rate": 3.1729171626172485e-06, - "loss": 0.6129, + "epoch": 0.43, + "grad_norm": 1.9258956375330782, + "learning_rate": 6.433853067923915e-06, + "loss": 0.6326, "step": 5992 }, { - "epoch": 0.63, - "grad_norm": 2.431241441047194, - "learning_rate": 3.1713308887352244e-06, - "loss": 0.5663, + "epoch": 0.43, + "grad_norm": 1.7019214240135452, + "learning_rate": 6.432752128939589e-06, + "loss": 0.5177, "step": 5993 }, { - "epoch": 0.63, - "grad_norm": 0.9957317360641763, - "learning_rate": 3.1697448273026944e-06, - "loss": 0.5867, + "epoch": 0.43, + "grad_norm": 1.6032057522097785, + "learning_rate": 6.431651114272439e-06, + "loss": 0.5331, "step": 5994 }, { - "epoch": 0.63, - "grad_norm": 4.718815023192582, - "learning_rate": 3.1681589785039178e-06, - "loss": 0.5582, + "epoch": 0.43, + "grad_norm": 1.4637203150717077, + "learning_rate": 6.4305500239806265e-06, + "loss": 0.5025, "step": 5995 }, { - "epoch": 0.63, - "grad_norm": 2.3173917102419392, - "learning_rate": 3.1665733425231356e-06, - "loss": 0.5831, + "epoch": 0.43, + "grad_norm": 1.7745714885467025, + "learning_rate": 6.429448858122314e-06, + "loss": 0.5626, "step": 5996 }, { - "epoch": 0.63, - "grad_norm": 2.9747101713599116, - "learning_rate": 3.164987919544563e-06, - "loss": 0.6145, + "epoch": 0.43, + "grad_norm": 1.6216255553716963, + "learning_rate": 6.428347616755668e-06, + "loss": 0.6221, "step": 5997 }, { - "epoch": 0.63, - "grad_norm": 3.0576887431965263, - "learning_rate": 3.163402709752389e-06, - "loss": 0.6303, + "epoch": 0.43, + "grad_norm": 0.8837107220867592, + "learning_rate": 6.4272462999388644e-06, + "loss": 0.4428, "step": 5998 }, { - "epoch": 0.63, - "grad_norm": 2.2892471503762697, - "learning_rate": 3.1618177133307813e-06, - "loss": 0.5384, + "epoch": 0.43, + "grad_norm": 1.7605128042352531, + "learning_rate": 6.426144907730072e-06, + "loss": 0.5209, "step": 5999 }, { - "epoch": 0.63, - "grad_norm": 2.0697756117981876, - "learning_rate": 3.1602329304638755e-06, - "loss": 0.6807, + "epoch": 0.43, + "grad_norm": 1.622835734973294, + "learning_rate": 6.425043440187475e-06, + "loss": 0.5821, "step": 6000 }, { - "epoch": 0.63, - "grad_norm": 2.1548228132567897, - "learning_rate": 3.158648361335791e-06, - "loss": 0.6769, + "epoch": 0.43, + "grad_norm": 1.8626463694606845, + "learning_rate": 6.423941897369255e-06, + "loss": 0.5855, "step": 6001 }, { - "epoch": 0.63, - "grad_norm": 2.171892545453622, - "learning_rate": 3.1570640061306153e-06, - "loss": 0.5813, + "epoch": 0.43, + "grad_norm": 1.9012019592984086, + "learning_rate": 6.422840279333598e-06, + "loss": 0.5107, "step": 6002 }, { - "epoch": 0.63, - "grad_norm": 3.5558082679769543, - "learning_rate": 3.1554798650324183e-06, - "loss": 0.5725, + "epoch": 0.43, + "grad_norm": 1.568800336255233, + "learning_rate": 6.421738586138695e-06, + "loss": 0.4794, "step": 6003 }, { - "epoch": 0.63, - "grad_norm": 2.1783862920998778, - "learning_rate": 3.1538959382252378e-06, - "loss": 0.6095, + "epoch": 0.43, + "grad_norm": 2.9023818605059257, + "learning_rate": 6.4206368178427444e-06, + "loss": 0.5823, "step": 6004 }, { - "epoch": 0.63, - "grad_norm": 2.439670545080028, - "learning_rate": 3.1523122258930904e-06, - "loss": 0.6402, + "epoch": 0.43, + "grad_norm": 1.6515305827598143, + "learning_rate": 6.419534974503942e-06, + "loss": 0.5561, "step": 6005 }, { - "epoch": 0.63, - "grad_norm": 2.1135984445220846, - "learning_rate": 3.150728728219966e-06, - "loss": 0.5736, + "epoch": 0.43, + "grad_norm": 1.5439018601656729, + "learning_rate": 6.418433056180493e-06, + "loss": 0.5543, "step": 6006 }, { - "epoch": 0.63, - "grad_norm": 2.2613893512798544, - "learning_rate": 3.149145445389835e-06, - "loss": 0.6038, + "epoch": 0.43, + "grad_norm": 1.7666254171551659, + "learning_rate": 6.417331062930604e-06, + "loss": 0.5788, "step": 6007 }, { - "epoch": 0.63, - "grad_norm": 2.7430614334073287, - "learning_rate": 3.147562377586635e-06, - "loss": 0.6266, + "epoch": 0.43, + "grad_norm": 1.5302926039016347, + "learning_rate": 6.4162289948124855e-06, + "loss": 0.6269, "step": 6008 }, { - "epoch": 0.63, - "grad_norm": 2.8377286942815885, - "learning_rate": 3.1459795249942815e-06, - "loss": 0.6455, + "epoch": 0.43, + "grad_norm": 1.6372357398534825, + "learning_rate": 6.415126851884352e-06, + "loss": 0.4776, "step": 6009 }, { - "epoch": 0.63, - "grad_norm": 0.9591836036301431, - "learning_rate": 3.144396887796669e-06, - "loss": 0.5318, + "epoch": 0.43, + "grad_norm": 1.6298204777041971, + "learning_rate": 6.414024634204423e-06, + "loss": 0.512, "step": 6010 }, { - "epoch": 0.63, - "grad_norm": 2.0086517588320363, - "learning_rate": 3.1428144661776605e-06, - "loss": 0.5734, + "epoch": 0.43, + "grad_norm": 1.615374376257846, + "learning_rate": 6.412922341830922e-06, + "loss": 0.5301, "step": 6011 }, { - "epoch": 0.63, - "grad_norm": 2.218164590981601, - "learning_rate": 3.141232260321102e-06, - "loss": 0.683, + "epoch": 0.43, + "grad_norm": 1.667507907030113, + "learning_rate": 6.411819974822075e-06, + "loss": 0.4957, "step": 6012 }, { - "epoch": 0.63, - "grad_norm": 2.503329798201055, - "learning_rate": 3.1396502704108034e-06, - "loss": 0.6242, + "epoch": 0.43, + "grad_norm": 1.711810953759433, + "learning_rate": 6.410717533236114e-06, + "loss": 0.6315, "step": 6013 }, { - "epoch": 0.63, - "grad_norm": 3.221827577676699, - "learning_rate": 3.13806849663056e-06, - "loss": 0.5933, + "epoch": 0.43, + "grad_norm": 1.6140636141920406, + "learning_rate": 6.409615017131271e-06, + "loss": 0.485, "step": 6014 }, { - "epoch": 0.63, - "grad_norm": 3.6189027947412518, - "learning_rate": 3.1364869391641343e-06, - "loss": 0.5451, + "epoch": 0.43, + "grad_norm": 1.6473609664269142, + "learning_rate": 6.408512426565788e-06, + "loss": 0.5247, "step": 6015 }, { - "epoch": 0.63, - "grad_norm": 2.2116203062161435, - "learning_rate": 3.1349055981952725e-06, - "loss": 0.5863, + "epoch": 0.43, + "grad_norm": 1.6413392568841374, + "learning_rate": 6.4074097615979045e-06, + "loss": 0.5521, "step": 6016 }, { - "epoch": 0.63, - "grad_norm": 2.4603820875117735, - "learning_rate": 3.133324473907685e-06, - "loss": 0.6867, + "epoch": 0.43, + "grad_norm": 1.6822529402881832, + "learning_rate": 6.406307022285869e-06, + "loss": 0.5639, "step": 6017 }, { - "epoch": 0.63, - "grad_norm": 4.688134030426931, - "learning_rate": 3.1317435664850626e-06, - "loss": 0.6107, + "epoch": 0.43, + "grad_norm": 3.9468393363453824, + "learning_rate": 6.4052042086879315e-06, + "loss": 0.569, "step": 6018 }, { - "epoch": 0.63, - "grad_norm": 2.2474502089494233, - "learning_rate": 3.130162876111074e-06, - "loss": 0.665, + "epoch": 0.43, + "grad_norm": 1.6979536903856498, + "learning_rate": 6.404101320862347e-06, + "loss": 0.5188, "step": 6019 }, { - "epoch": 0.63, - "grad_norm": 2.235530948111977, - "learning_rate": 3.128582402969358e-06, - "loss": 0.6928, + "epoch": 0.43, + "grad_norm": 1.512470851577651, + "learning_rate": 6.402998358867372e-06, + "loss": 0.52, "step": 6020 }, { - "epoch": 0.63, - "grad_norm": 2.5821548582113008, - "learning_rate": 3.1270021472435276e-06, - "loss": 0.6203, + "epoch": 0.43, + "grad_norm": 1.8475737007233368, + "learning_rate": 6.40189532276127e-06, + "loss": 0.565, "step": 6021 }, { - "epoch": 0.63, - "grad_norm": 2.637774136880771, - "learning_rate": 3.125422109117173e-06, - "loss": 0.66, + "epoch": 0.43, + "grad_norm": 1.6463262615096825, + "learning_rate": 6.400792212602307e-06, + "loss": 0.5542, "step": 6022 }, { - "epoch": 0.63, - "grad_norm": 2.1103955615847716, - "learning_rate": 3.1238422887738596e-06, - "loss": 0.5965, + "epoch": 0.43, + "grad_norm": 0.8123712430084031, + "learning_rate": 6.3996890284487525e-06, + "loss": 0.4516, "step": 6023 }, { - "epoch": 0.63, - "grad_norm": 2.2341447165344492, - "learning_rate": 3.122262686397124e-06, - "loss": 0.741, + "epoch": 0.43, + "grad_norm": 1.5442401424985055, + "learning_rate": 6.398585770358879e-06, + "loss": 0.5287, "step": 6024 }, { - "epoch": 0.63, - "grad_norm": 2.2231015638638967, - "learning_rate": 3.1206833021704843e-06, - "loss": 0.6181, + "epoch": 0.43, + "grad_norm": 1.691081570547479, + "learning_rate": 6.3974824383909694e-06, + "loss": 0.6219, "step": 6025 }, { - "epoch": 0.63, - "grad_norm": 2.8157201183565475, - "learning_rate": 3.1191041362774246e-06, - "loss": 0.688, + "epoch": 0.43, + "grad_norm": 1.5194993733210718, + "learning_rate": 6.3963790326033e-06, + "loss": 0.4854, "step": 6026 }, { - "epoch": 0.63, - "grad_norm": 2.6112171825573403, - "learning_rate": 3.117525188901409e-06, - "loss": 0.6337, + "epoch": 0.43, + "grad_norm": 1.7719891791811102, + "learning_rate": 6.395275553054159e-06, + "loss": 0.5187, "step": 6027 }, { - "epoch": 0.63, - "grad_norm": 2.591086899022109, - "learning_rate": 3.115946460225875e-06, - "loss": 0.6666, + "epoch": 0.43, + "grad_norm": 1.5484178374074236, + "learning_rate": 6.394171999801835e-06, + "loss": 0.5929, "step": 6028 }, { - "epoch": 0.63, - "grad_norm": 1.990144411097681, - "learning_rate": 3.1143679504342367e-06, - "loss": 0.607, + "epoch": 0.43, + "grad_norm": 1.9746152087259201, + "learning_rate": 6.393068372904623e-06, + "loss": 0.5316, "step": 6029 }, { - "epoch": 0.63, - "grad_norm": 1.097695113019066, - "learning_rate": 3.1127896597098784e-06, - "loss": 0.5598, + "epoch": 0.43, + "grad_norm": 1.6275468756754354, + "learning_rate": 6.391964672420818e-06, + "loss": 0.5647, "step": 6030 }, { - "epoch": 0.63, - "grad_norm": 4.043305343302304, - "learning_rate": 3.1112115882361605e-06, - "loss": 0.6536, + "epoch": 0.43, + "grad_norm": 1.849935197927109, + "learning_rate": 6.3908608984087225e-06, + "loss": 0.5343, "step": 6031 }, { - "epoch": 0.63, - "grad_norm": 2.1224861128520143, - "learning_rate": 3.1096337361964213e-06, - "loss": 0.6197, + "epoch": 0.43, + "grad_norm": 2.339010604118414, + "learning_rate": 6.389757050926641e-06, + "loss": 0.5259, "step": 6032 }, { - "epoch": 0.63, - "grad_norm": 2.2716747161287523, - "learning_rate": 3.108056103773972e-06, - "loss": 0.6368, + "epoch": 0.43, + "grad_norm": 1.537707850968111, + "learning_rate": 6.388653130032885e-06, + "loss": 0.5304, "step": 6033 }, { - "epoch": 0.63, - "grad_norm": 2.3153801155163998, - "learning_rate": 3.106478691152094e-06, - "loss": 0.6325, + "epoch": 0.43, + "grad_norm": 1.6251019891427079, + "learning_rate": 6.387549135785765e-06, + "loss": 0.552, "step": 6034 }, { - "epoch": 0.64, - "grad_norm": 2.5772248457210116, - "learning_rate": 3.1049014985140468e-06, - "loss": 0.6982, + "epoch": 0.43, + "grad_norm": 1.8893317186668974, + "learning_rate": 6.386445068243596e-06, + "loss": 0.5396, "step": 6035 }, { - "epoch": 0.64, - "grad_norm": 3.92355644648877, - "learning_rate": 3.103324526043066e-06, - "loss": 0.6235, + "epoch": 0.43, + "grad_norm": 1.501769650190139, + "learning_rate": 6.385340927464701e-06, + "loss": 0.5304, "step": 6036 }, { - "epoch": 0.64, - "grad_norm": 2.394493973433464, - "learning_rate": 3.101747773922359e-06, - "loss": 0.5862, + "epoch": 0.43, + "grad_norm": 1.754153773265779, + "learning_rate": 6.384236713507404e-06, + "loss": 0.5885, "step": 6037 }, { - "epoch": 0.64, - "grad_norm": 2.840825066417566, - "learning_rate": 3.100171242335109e-06, - "loss": 0.6773, + "epoch": 0.43, + "grad_norm": 1.5234461500832417, + "learning_rate": 6.383132426430034e-06, + "loss": 0.5496, "step": 6038 }, { - "epoch": 0.64, - "grad_norm": 2.3142902400992083, - "learning_rate": 3.0985949314644724e-06, - "loss": 0.6347, + "epoch": 0.43, + "grad_norm": 1.8244736229165082, + "learning_rate": 6.382028066290923e-06, + "loss": 0.5681, "step": 6039 }, { - "epoch": 0.64, - "grad_norm": 3.1693698090325952, - "learning_rate": 3.09701884149358e-06, - "loss": 0.632, + "epoch": 0.43, + "grad_norm": 1.6667521962894258, + "learning_rate": 6.380923633148406e-06, + "loss": 0.4865, "step": 6040 }, { - "epoch": 0.64, - "grad_norm": 2.174190785161442, - "learning_rate": 3.0954429726055367e-06, - "loss": 0.5664, + "epoch": 0.43, + "grad_norm": 2.096830370144309, + "learning_rate": 6.379819127060824e-06, + "loss": 0.5722, "step": 6041 }, { - "epoch": 0.64, - "grad_norm": 2.427368074809071, - "learning_rate": 3.093867324983425e-06, - "loss": 0.6002, + "epoch": 0.43, + "grad_norm": 2.5334943182876573, + "learning_rate": 6.37871454808652e-06, + "loss": 0.5752, "step": 6042 }, { - "epoch": 0.64, - "grad_norm": 0.98991358532084, - "learning_rate": 3.0922918988102968e-06, - "loss": 0.5889, + "epoch": 0.43, + "grad_norm": 1.721926322244316, + "learning_rate": 6.377609896283842e-06, + "loss": 0.5439, "step": 6043 }, { - "epoch": 0.64, - "grad_norm": 3.16305476049039, - "learning_rate": 3.0907166942691804e-06, - "loss": 0.5656, + "epoch": 0.43, + "grad_norm": 1.5420627136404597, + "learning_rate": 6.376505171711142e-06, + "loss": 0.59, "step": 6044 }, { - "epoch": 0.64, - "grad_norm": 2.3426174204082426, - "learning_rate": 3.0891417115430794e-06, - "loss": 0.5778, + "epoch": 0.43, + "grad_norm": 3.0660598311639706, + "learning_rate": 6.375400374426772e-06, + "loss": 0.5059, "step": 6045 }, { - "epoch": 0.64, - "grad_norm": 2.695431267278061, - "learning_rate": 3.08756695081497e-06, - "loss": 0.662, + "epoch": 0.43, + "grad_norm": 1.7285186140305258, + "learning_rate": 6.374295504489095e-06, + "loss": 0.5649, "step": 6046 }, { - "epoch": 0.64, - "grad_norm": 2.4120245938951017, - "learning_rate": 3.085992412267807e-06, - "loss": 0.5995, + "epoch": 0.43, + "grad_norm": 0.6930666236267564, + "learning_rate": 6.373190561956472e-06, + "loss": 0.4376, "step": 6047 }, { - "epoch": 0.64, - "grad_norm": 2.0993135251738906, - "learning_rate": 3.08441809608451e-06, - "loss": 0.6046, + "epoch": 0.43, + "grad_norm": 1.4829460529081877, + "learning_rate": 6.372085546887272e-06, + "loss": 0.5277, "step": 6048 }, { - "epoch": 0.64, - "grad_norm": 2.120694022813388, - "learning_rate": 3.0828440024479823e-06, - "loss": 0.597, + "epoch": 0.43, + "grad_norm": 1.8159403140375654, + "learning_rate": 6.370980459339865e-06, + "loss": 0.5666, "step": 6049 }, { - "epoch": 0.64, - "grad_norm": 2.164550468698215, - "learning_rate": 3.081270131541094e-06, - "loss": 0.6243, + "epoch": 0.43, + "grad_norm": 3.2507604358748505, + "learning_rate": 6.369875299372623e-06, + "loss": 0.5356, "step": 6050 }, { - "epoch": 0.64, - "grad_norm": 2.499720888315672, - "learning_rate": 3.079696483546699e-06, - "loss": 0.5662, + "epoch": 0.43, + "grad_norm": 1.6328435461626802, + "learning_rate": 6.368770067043927e-06, + "loss": 0.5627, "step": 6051 }, { - "epoch": 0.64, - "grad_norm": 2.19241543023071, - "learning_rate": 3.078123058647614e-06, - "loss": 0.645, + "epoch": 0.43, + "grad_norm": 0.8664752452428686, + "learning_rate": 6.367664762412157e-06, + "loss": 0.4515, "step": 6052 }, { - "epoch": 0.64, - "grad_norm": 2.1759008939791316, - "learning_rate": 3.0765498570266354e-06, - "loss": 0.5636, + "epoch": 0.43, + "grad_norm": 1.5873065021394948, + "learning_rate": 6.366559385535699e-06, + "loss": 0.569, "step": 6053 }, { - "epoch": 0.64, - "grad_norm": 2.481766219102531, - "learning_rate": 3.074976878866536e-06, - "loss": 0.5773, + "epoch": 0.43, + "grad_norm": 1.6050920565344173, + "learning_rate": 6.365453936472945e-06, + "loss": 0.4822, "step": 6054 }, { - "epoch": 0.64, - "grad_norm": 2.6119108357768677, - "learning_rate": 3.0734041243500578e-06, - "loss": 0.6492, + "epoch": 0.43, + "grad_norm": 2.1566277832956833, + "learning_rate": 6.3643484152822875e-06, + "loss": 0.5252, "step": 6055 }, { - "epoch": 0.64, - "grad_norm": 2.1005618832159336, - "learning_rate": 3.0718315936599184e-06, - "loss": 0.6326, + "epoch": 0.43, + "grad_norm": 1.6310431103553487, + "learning_rate": 6.363242822022123e-06, + "loss": 0.5566, "step": 6056 }, { - "epoch": 0.64, - "grad_norm": 2.1970627941200704, - "learning_rate": 3.0702592869788105e-06, - "loss": 0.5606, + "epoch": 0.43, + "grad_norm": 1.667473201511612, + "learning_rate": 6.362137156750855e-06, + "loss": 0.6133, "step": 6057 }, { - "epoch": 0.64, - "grad_norm": 1.0059803541239813, - "learning_rate": 3.0686872044894014e-06, - "loss": 0.5722, + "epoch": 0.43, + "grad_norm": 0.7424331248176058, + "learning_rate": 6.361031419526885e-06, + "loss": 0.4492, "step": 6058 }, { - "epoch": 0.64, - "grad_norm": 3.2015112152252043, - "learning_rate": 3.0671153463743282e-06, - "loss": 0.641, + "epoch": 0.43, + "grad_norm": 1.4496912626604237, + "learning_rate": 6.359925610408625e-06, + "loss": 0.5402, "step": 6059 }, { - "epoch": 0.64, - "grad_norm": 1.0191280750132719, - "learning_rate": 3.0655437128162093e-06, - "loss": 0.5936, + "epoch": 0.43, + "grad_norm": 0.7774191132663957, + "learning_rate": 6.358819729454485e-06, + "loss": 0.447, "step": 6060 }, { - "epoch": 0.64, - "grad_norm": 6.045650809744414, - "learning_rate": 3.0639723039976284e-06, - "loss": 0.6325, + "epoch": 0.43, + "grad_norm": 1.5696001680809417, + "learning_rate": 6.357713776722883e-06, + "loss": 0.5554, "step": 6061 }, { - "epoch": 0.64, - "grad_norm": 2.2246240058605222, - "learning_rate": 3.062401120101149e-06, - "loss": 0.6262, + "epoch": 0.43, + "grad_norm": 1.6309799508288485, + "learning_rate": 6.356607752272238e-06, + "loss": 0.535, "step": 6062 }, { - "epoch": 0.64, - "grad_norm": 3.146655954063514, - "learning_rate": 3.060830161309305e-06, - "loss": 0.699, + "epoch": 0.43, + "grad_norm": 1.5409032780476837, + "learning_rate": 6.355501656160975e-06, + "loss": 0.5914, "step": 6063 }, { - "epoch": 0.64, - "grad_norm": 2.0936615072845663, - "learning_rate": 3.05925942780461e-06, - "loss": 0.5818, + "epoch": 0.43, + "grad_norm": 3.998334370519608, + "learning_rate": 6.3543954884475225e-06, + "loss": 0.5786, "step": 6064 }, { - "epoch": 0.64, - "grad_norm": 4.660109768200281, - "learning_rate": 3.0576889197695435e-06, - "loss": 0.5617, + "epoch": 0.43, + "grad_norm": 1.7298587965954304, + "learning_rate": 6.35328924919031e-06, + "loss": 0.5821, "step": 6065 }, { - "epoch": 0.64, - "grad_norm": 2.2782490312138446, - "learning_rate": 3.0561186373865625e-06, - "loss": 0.5962, + "epoch": 0.43, + "grad_norm": 1.930386077934964, + "learning_rate": 6.352182938447775e-06, + "loss": 0.5359, "step": 6066 }, { - "epoch": 0.64, - "grad_norm": 2.5339115449409535, - "learning_rate": 3.054548580838099e-06, - "loss": 0.6213, + "epoch": 0.43, + "grad_norm": 1.7061029464254989, + "learning_rate": 6.351076556278354e-06, + "loss": 0.5433, "step": 6067 }, { - "epoch": 0.64, - "grad_norm": 2.145966200390791, - "learning_rate": 3.05297875030656e-06, - "loss": 0.5802, + "epoch": 0.43, + "grad_norm": 1.6486790342231459, + "learning_rate": 6.349970102740492e-06, + "loss": 0.5306, "step": 6068 }, { - "epoch": 0.64, - "grad_norm": 3.023290228663878, - "learning_rate": 3.05140914597432e-06, - "loss": 0.6221, + "epoch": 0.43, + "grad_norm": 1.4883072993822084, + "learning_rate": 6.348863577892634e-06, + "loss": 0.4812, "step": 6069 }, { - "epoch": 0.64, - "grad_norm": 2.7573240624259157, - "learning_rate": 3.049839768023732e-06, - "loss": 0.5638, + "epoch": 0.43, + "grad_norm": 1.672612799616705, + "learning_rate": 6.347756981793231e-06, + "loss": 0.517, "step": 6070 }, { - "epoch": 0.64, - "grad_norm": 10.360362698449237, - "learning_rate": 3.0482706166371236e-06, - "loss": 0.6423, + "epoch": 0.43, + "grad_norm": 1.6713994942557995, + "learning_rate": 6.3466503145007385e-06, + "loss": 0.5221, "step": 6071 }, { - "epoch": 0.64, - "grad_norm": 2.7058889588734343, - "learning_rate": 3.0467016919967908e-06, - "loss": 0.6319, + "epoch": 0.43, + "grad_norm": 1.7280025606133818, + "learning_rate": 6.345543576073614e-06, + "loss": 0.6066, "step": 6072 }, { - "epoch": 0.64, - "grad_norm": 2.38161674800116, - "learning_rate": 3.0451329942850117e-06, - "loss": 0.6777, + "epoch": 0.43, + "grad_norm": 1.8332322985624276, + "learning_rate": 6.344436766570317e-06, + "loss": 0.5454, "step": 6073 }, { - "epoch": 0.64, - "grad_norm": 2.4754236044435, - "learning_rate": 3.0435645236840296e-06, - "loss": 0.6553, + "epoch": 0.43, + "grad_norm": 1.7801530058706834, + "learning_rate": 6.343329886049316e-06, + "loss": 0.5304, "step": 6074 }, { - "epoch": 0.64, - "grad_norm": 2.505835959851159, - "learning_rate": 3.041996280376066e-06, - "loss": 0.5921, + "epoch": 0.43, + "grad_norm": 1.4013317306437543, + "learning_rate": 6.342222934569078e-06, + "loss": 0.5494, "step": 6075 }, { - "epoch": 0.64, - "grad_norm": 2.2325223472969085, - "learning_rate": 3.0404282645433125e-06, - "loss": 0.6708, + "epoch": 0.43, + "grad_norm": 1.5946136250603649, + "learning_rate": 6.341115912188075e-06, + "loss": 0.5688, "step": 6076 }, { - "epoch": 0.64, - "grad_norm": 2.4229637130342065, - "learning_rate": 3.038860476367942e-06, - "loss": 0.5817, + "epoch": 0.43, + "grad_norm": 1.6880700240408795, + "learning_rate": 6.340008818964786e-06, + "loss": 0.4793, "step": 6077 }, { - "epoch": 0.64, - "grad_norm": 2.2424616876949837, - "learning_rate": 3.03729291603209e-06, - "loss": 0.6665, + "epoch": 0.43, + "grad_norm": 1.7427498920946018, + "learning_rate": 6.33890165495769e-06, + "loss": 0.5808, "step": 6078 }, { - "epoch": 0.64, - "grad_norm": 2.196951713754301, - "learning_rate": 3.0357255837178733e-06, - "loss": 0.6736, + "epoch": 0.43, + "grad_norm": 1.4727616842982063, + "learning_rate": 6.337794420225274e-06, + "loss": 0.5446, "step": 6079 }, { - "epoch": 0.64, - "grad_norm": 3.412747433968496, - "learning_rate": 3.034158479607381e-06, - "loss": 0.6189, + "epoch": 0.43, + "grad_norm": 0.721164221831636, + "learning_rate": 6.336687114826021e-06, + "loss": 0.4388, "step": 6080 }, { - "epoch": 0.64, - "grad_norm": 2.3144405798445638, - "learning_rate": 3.032591603882674e-06, - "loss": 0.6844, + "epoch": 0.43, + "grad_norm": 2.8272780387034, + "learning_rate": 6.335579738818428e-06, + "loss": 0.4593, "step": 6081 }, { - "epoch": 0.64, - "grad_norm": 2.7027103937151957, - "learning_rate": 3.031024956725787e-06, - "loss": 0.6343, + "epoch": 0.43, + "grad_norm": 1.9699786007736828, + "learning_rate": 6.334472292260986e-06, + "loss": 0.6211, "step": 6082 }, { - "epoch": 0.64, - "grad_norm": 2.612524722737757, - "learning_rate": 3.029458538318728e-06, - "loss": 0.6779, + "epoch": 0.43, + "grad_norm": 1.9846413541329506, + "learning_rate": 6.333364775212195e-06, + "loss": 0.559, "step": 6083 }, { - "epoch": 0.64, - "grad_norm": 2.5457464342733767, - "learning_rate": 3.02789234884348e-06, - "loss": 0.6377, + "epoch": 0.43, + "grad_norm": 1.6428691969143958, + "learning_rate": 6.332257187730559e-06, + "loss": 0.5158, "step": 6084 }, { - "epoch": 0.64, - "grad_norm": 1.9720571167689611, - "learning_rate": 3.0263263884819975e-06, - "loss": 0.6417, + "epoch": 0.43, + "grad_norm": 1.5475940338363856, + "learning_rate": 6.331149529874585e-06, + "loss": 0.5177, "step": 6085 }, { - "epoch": 0.64, - "grad_norm": 2.847776105778556, - "learning_rate": 3.0247606574162127e-06, - "loss": 0.6391, + "epoch": 0.43, + "grad_norm": 7.802418063036298, + "learning_rate": 6.330041801702782e-06, + "loss": 0.4956, "step": 6086 }, { - "epoch": 0.64, - "grad_norm": 2.33929101232662, - "learning_rate": 3.0231951558280226e-06, - "loss": 0.6156, + "epoch": 0.43, + "grad_norm": 1.474375336430168, + "learning_rate": 6.328934003273665e-06, + "loss": 0.4963, "step": 6087 }, { - "epoch": 0.64, - "grad_norm": 2.5013360523106787, - "learning_rate": 3.0216298838993043e-06, - "loss": 0.7201, + "epoch": 0.43, + "grad_norm": 1.8115152788835485, + "learning_rate": 6.327826134645749e-06, + "loss": 0.4461, "step": 6088 }, { - "epoch": 0.64, - "grad_norm": 3.089321590479491, - "learning_rate": 3.020064841811908e-06, - "loss": 0.6186, + "epoch": 0.43, + "grad_norm": 1.8019911306165188, + "learning_rate": 6.326718195877559e-06, + "loss": 0.5545, "step": 6089 }, { - "epoch": 0.64, - "grad_norm": 2.7046583661780823, - "learning_rate": 3.018500029747657e-06, - "loss": 0.6985, + "epoch": 0.43, + "grad_norm": 1.6922096554144357, + "learning_rate": 6.3256101870276165e-06, + "loss": 0.5432, "step": 6090 }, { - "epoch": 0.64, - "grad_norm": 2.5221299383717657, - "learning_rate": 3.016935447888343e-06, - "loss": 0.6104, + "epoch": 0.43, + "grad_norm": 1.730534631411812, + "learning_rate": 6.324502108154454e-06, + "loss": 0.5926, "step": 6091 }, { - "epoch": 0.64, - "grad_norm": 2.1978338732670597, - "learning_rate": 3.015371096415735e-06, - "loss": 0.5294, + "epoch": 0.43, + "grad_norm": 1.6763032117537606, + "learning_rate": 6.323393959316601e-06, + "loss": 0.5834, "step": 6092 }, { - "epoch": 0.64, - "grad_norm": 2.685524749270053, - "learning_rate": 3.0138069755115772e-06, - "loss": 0.672, + "epoch": 0.43, + "grad_norm": 1.6212395680993688, + "learning_rate": 6.322285740572594e-06, + "loss": 0.5665, "step": 6093 }, { - "epoch": 0.64, - "grad_norm": 2.1195488615414546, - "learning_rate": 3.012243085357582e-06, - "loss": 0.6829, + "epoch": 0.43, + "grad_norm": 1.4977825978565384, + "learning_rate": 6.321177451980975e-06, + "loss": 0.5561, "step": 6094 }, { - "epoch": 0.64, - "grad_norm": 2.577287209734468, - "learning_rate": 3.010679426135442e-06, - "loss": 0.7072, + "epoch": 0.43, + "grad_norm": 1.5585795532454014, + "learning_rate": 6.320069093600287e-06, + "loss": 0.5748, "step": 6095 }, { - "epoch": 0.64, - "grad_norm": 6.13324035955566, - "learning_rate": 3.009115998026815e-06, - "loss": 0.5589, + "epoch": 0.43, + "grad_norm": 1.759789714053568, + "learning_rate": 6.318960665489076e-06, + "loss": 0.5802, "step": 6096 }, { - "epoch": 0.64, - "grad_norm": 2.274593312395244, - "learning_rate": 3.007552801213335e-06, - "loss": 0.6797, + "epoch": 0.43, + "grad_norm": 1.6970687185602316, + "learning_rate": 6.317852167705893e-06, + "loss": 0.503, "step": 6097 }, { - "epoch": 0.64, - "grad_norm": 0.8495761189029405, - "learning_rate": 3.0059898358766102e-06, - "loss": 0.5548, + "epoch": 0.43, + "grad_norm": 1.7662538973367834, + "learning_rate": 6.316743600309292e-06, + "loss": 0.5124, "step": 6098 }, { - "epoch": 0.64, - "grad_norm": 2.263736591389369, - "learning_rate": 3.004427102198225e-06, - "loss": 0.6734, + "epoch": 0.43, + "grad_norm": 2.0631605394959913, + "learning_rate": 6.3156349633578325e-06, + "loss": 0.5578, "step": 6099 }, { - "epoch": 0.64, - "grad_norm": 2.428586454310511, - "learning_rate": 3.002864600359729e-06, - "loss": 0.6665, + "epoch": 0.43, + "grad_norm": 1.799792036265137, + "learning_rate": 6.314526256910077e-06, + "loss": 0.5372, "step": 6100 }, { - "epoch": 0.64, - "grad_norm": 2.6142648776527855, - "learning_rate": 3.0013023305426493e-06, - "loss": 0.665, + "epoch": 0.43, + "grad_norm": 1.5262260514380073, + "learning_rate": 6.313417481024591e-06, + "loss": 0.4798, "step": 6101 }, { - "epoch": 0.64, - "grad_norm": 4.093099723594316, - "learning_rate": 2.9997402929284886e-06, - "loss": 0.5937, + "epoch": 0.43, + "grad_norm": 1.6504546546202055, + "learning_rate": 6.3123086357599425e-06, + "loss": 0.4858, "step": 6102 }, { - "epoch": 0.64, - "grad_norm": 0.981769329689394, - "learning_rate": 2.9981784876987195e-06, - "loss": 0.5496, + "epoch": 0.43, + "grad_norm": 0.788843214084213, + "learning_rate": 6.311199721174706e-06, + "loss": 0.4554, "step": 6103 }, { - "epoch": 0.64, - "grad_norm": 2.089324365056355, - "learning_rate": 2.996616915034786e-06, - "loss": 0.582, + "epoch": 0.43, + "grad_norm": 1.9803111198019188, + "learning_rate": 6.310090737327454e-06, + "loss": 0.5302, "step": 6104 }, { - "epoch": 0.64, - "grad_norm": 3.283444978516847, - "learning_rate": 2.9950555751181067e-06, - "loss": 0.6011, + "epoch": 0.43, + "grad_norm": 1.5980135391963333, + "learning_rate": 6.308981684276774e-06, + "loss": 0.5663, "step": 6105 }, { - "epoch": 0.64, - "grad_norm": 2.1322926764233148, - "learning_rate": 2.9934944681300764e-06, - "loss": 0.595, + "epoch": 0.43, + "grad_norm": 2.05909675895273, + "learning_rate": 6.307872562081244e-06, + "loss": 0.5117, "step": 6106 }, { - "epoch": 0.64, - "grad_norm": 2.6785042803171994, - "learning_rate": 2.9919335942520577e-06, - "loss": 0.604, + "epoch": 0.43, + "grad_norm": 3.9094287150764266, + "learning_rate": 6.306763370799454e-06, + "loss": 0.5302, "step": 6107 }, { - "epoch": 0.64, - "grad_norm": 2.525026434174256, - "learning_rate": 2.9903729536653908e-06, - "loss": 0.5419, + "epoch": 0.43, + "grad_norm": 0.8014578744429597, + "learning_rate": 6.305654110489996e-06, + "loss": 0.4535, "step": 6108 }, { - "epoch": 0.64, - "grad_norm": 4.332421592393996, - "learning_rate": 2.9888125465513838e-06, - "loss": 0.6079, + "epoch": 0.43, + "grad_norm": 0.7821462655626772, + "learning_rate": 6.304544781211461e-06, + "loss": 0.4428, "step": 6109 }, { - "epoch": 0.64, - "grad_norm": 2.7390363662038246, - "learning_rate": 2.987252373091322e-06, - "loss": 0.5825, + "epoch": 0.43, + "grad_norm": 1.5627923418959861, + "learning_rate": 6.303435383022453e-06, + "loss": 0.5245, "step": 6110 }, { - "epoch": 0.64, - "grad_norm": 2.6929057930774722, - "learning_rate": 2.9856924334664607e-06, - "loss": 0.5882, + "epoch": 0.43, + "grad_norm": 1.5778730970804111, + "learning_rate": 6.3023259159815705e-06, + "loss": 0.4845, "step": 6111 }, { - "epoch": 0.64, - "grad_norm": 2.8780256095081334, - "learning_rate": 2.9841327278580306e-06, - "loss": 0.5991, + "epoch": 0.43, + "grad_norm": 1.7514241813910292, + "learning_rate": 6.301216380147419e-06, + "loss": 0.5967, "step": 6112 }, { - "epoch": 0.64, - "grad_norm": 3.211030034668102, - "learning_rate": 2.982573256447232e-06, - "loss": 0.6217, + "epoch": 0.43, + "grad_norm": 1.7931257183949585, + "learning_rate": 6.3001067755786105e-06, + "loss": 0.4948, "step": 6113 }, { - "epoch": 0.64, - "grad_norm": 2.076849408004536, - "learning_rate": 2.98101401941524e-06, - "loss": 0.6127, + "epoch": 0.43, + "grad_norm": 1.8632497864967363, + "learning_rate": 6.298997102333756e-06, + "loss": 0.5127, "step": 6114 }, { - "epoch": 0.64, - "grad_norm": 2.8399499017577328, - "learning_rate": 2.979455016943204e-06, - "loss": 0.6985, + "epoch": 0.43, + "grad_norm": 1.990703323015417, + "learning_rate": 6.297887360471474e-06, + "loss": 0.4982, "step": 6115 }, { - "epoch": 0.64, - "grad_norm": 2.5380298634074285, - "learning_rate": 2.977896249212244e-06, - "loss": 0.6568, + "epoch": 0.43, + "grad_norm": 0.7565502975097378, + "learning_rate": 6.296777550050384e-06, + "loss": 0.4396, "step": 6116 }, { - "epoch": 0.64, - "grad_norm": 2.1139041232082096, - "learning_rate": 2.976337716403452e-06, - "loss": 0.6043, + "epoch": 0.43, + "grad_norm": 1.696714864096735, + "learning_rate": 6.295667671129109e-06, + "loss": 0.5551, "step": 6117 }, { - "epoch": 0.64, - "grad_norm": 2.386270978789105, - "learning_rate": 2.974779418697893e-06, - "loss": 0.58, + "epoch": 0.43, + "grad_norm": 1.6355721572430648, + "learning_rate": 6.294557723766277e-06, + "loss": 0.658, "step": 6118 }, { - "epoch": 0.64, - "grad_norm": 2.249981645922609, - "learning_rate": 2.9732213562766076e-06, - "loss": 0.7059, + "epoch": 0.43, + "grad_norm": 2.0328367666366334, + "learning_rate": 6.293447708020519e-06, + "loss": 0.6119, "step": 6119 }, { - "epoch": 0.64, - "grad_norm": 2.7953475276968995, - "learning_rate": 2.9716635293206054e-06, - "loss": 0.5731, + "epoch": 0.43, + "grad_norm": 1.7772455220383725, + "learning_rate": 6.292337623950471e-06, + "loss": 0.5195, "step": 6120 }, { - "epoch": 0.64, - "grad_norm": 2.1595949678722386, - "learning_rate": 2.9701059380108732e-06, - "loss": 0.6399, + "epoch": 0.43, + "grad_norm": 1.6352842659506563, + "learning_rate": 6.291227471614772e-06, + "loss": 0.6237, "step": 6121 }, { - "epoch": 0.64, - "grad_norm": 4.250031089313065, - "learning_rate": 2.9685485825283646e-06, - "loss": 0.6693, + "epoch": 0.43, + "grad_norm": 1.835530446528254, + "learning_rate": 6.29011725107206e-06, + "loss": 0.5877, "step": 6122 }, { - "epoch": 0.64, - "grad_norm": 2.716682335751081, - "learning_rate": 2.9669914630540074e-06, - "loss": 0.5684, + "epoch": 0.43, + "grad_norm": 1.7602722289561124, + "learning_rate": 6.289006962380986e-06, + "loss": 0.5288, "step": 6123 }, { - "epoch": 0.64, - "grad_norm": 2.11893087796511, - "learning_rate": 2.9654345797687067e-06, - "loss": 0.6003, + "epoch": 0.43, + "grad_norm": 1.5528271240653484, + "learning_rate": 6.287896605600195e-06, + "loss": 0.5207, "step": 6124 }, { - "epoch": 0.64, - "grad_norm": 3.1494558741601413, - "learning_rate": 2.9638779328533363e-06, - "loss": 0.5924, + "epoch": 0.43, + "grad_norm": 1.6632876956213896, + "learning_rate": 6.286786180788341e-06, + "loss": 0.5911, "step": 6125 }, { - "epoch": 0.64, - "grad_norm": 2.6969375293675673, - "learning_rate": 2.9623215224887405e-06, - "loss": 0.6277, + "epoch": 0.43, + "grad_norm": 0.7783529778225994, + "learning_rate": 6.285675688004081e-06, + "loss": 0.4418, "step": 6126 }, { - "epoch": 0.64, - "grad_norm": 5.129836907789918, - "learning_rate": 2.9607653488557385e-06, - "loss": 0.6265, + "epoch": 0.43, + "grad_norm": 1.4813194355114034, + "learning_rate": 6.284565127306075e-06, + "loss": 0.5341, "step": 6127 }, { - "epoch": 0.64, - "grad_norm": 2.24499842881353, - "learning_rate": 2.9592094121351257e-06, - "loss": 0.5141, + "epoch": 0.43, + "grad_norm": 1.669930380544074, + "learning_rate": 6.283454498752985e-06, + "loss": 0.6283, "step": 6128 }, { - "epoch": 0.64, - "grad_norm": 1.1727226381138738, - "learning_rate": 2.9576537125076644e-06, - "loss": 0.5722, + "epoch": 0.43, + "grad_norm": 2.1252952337405, + "learning_rate": 6.28234380240348e-06, + "loss": 0.4762, "step": 6129 }, { - "epoch": 0.65, - "grad_norm": 2.4525235655286766, - "learning_rate": 2.956098250154089e-06, - "loss": 0.628, + "epoch": 0.43, + "grad_norm": 1.7698785457888835, + "learning_rate": 6.281233038316231e-06, + "loss": 0.5654, "step": 6130 }, { - "epoch": 0.65, - "grad_norm": 3.0032073192917212, - "learning_rate": 2.954543025255111e-06, - "loss": 0.6489, + "epoch": 0.44, + "grad_norm": 1.6502491720614618, + "learning_rate": 6.280122206549911e-06, + "loss": 0.6142, "step": 6131 }, { - "epoch": 0.65, - "grad_norm": 2.2967514557097366, - "learning_rate": 2.9529880379914123e-06, - "loss": 0.6079, + "epoch": 0.44, + "grad_norm": 1.469238816355353, + "learning_rate": 6.2790113071631995e-06, + "loss": 0.4954, "step": 6132 }, { - "epoch": 0.65, - "grad_norm": 3.9813396803758367, - "learning_rate": 2.9514332885436447e-06, - "loss": 0.6043, + "epoch": 0.44, + "grad_norm": 1.6244157797136503, + "learning_rate": 6.277900340214775e-06, + "loss": 0.5854, "step": 6133 }, { - "epoch": 0.65, - "grad_norm": 2.374827795762819, - "learning_rate": 2.9498787770924375e-06, - "loss": 0.6733, + "epoch": 0.44, + "grad_norm": 2.3954772480888566, + "learning_rate": 6.276789305763325e-06, + "loss": 0.4888, "step": 6134 }, { - "epoch": 0.65, - "grad_norm": 2.8305018905675694, - "learning_rate": 2.9483245038183874e-06, - "loss": 0.6773, + "epoch": 0.44, + "grad_norm": 1.8572885649158046, + "learning_rate": 6.275678203867539e-06, + "loss": 0.5396, "step": 6135 }, { - "epoch": 0.65, - "grad_norm": 2.729421853905016, - "learning_rate": 2.946770468902064e-06, - "loss": 0.6898, + "epoch": 0.44, + "grad_norm": 1.9666503447153676, + "learning_rate": 6.274567034586107e-06, + "loss": 0.4823, "step": 6136 }, { - "epoch": 0.65, - "grad_norm": 0.9984646956788515, - "learning_rate": 2.945216672524014e-06, - "loss": 0.6088, + "epoch": 0.44, + "grad_norm": 1.8319228051918792, + "learning_rate": 6.273455797977724e-06, + "loss": 0.5444, "step": 6137 }, { - "epoch": 0.65, - "grad_norm": 2.667763450610834, - "learning_rate": 2.943663114864752e-06, - "loss": 0.577, + "epoch": 0.44, + "grad_norm": 1.8598215106329359, + "learning_rate": 6.2723444941010924e-06, + "loss": 0.4726, "step": 6138 }, { - "epoch": 0.65, - "grad_norm": 2.802310695654871, - "learning_rate": 2.9421097961047633e-06, - "loss": 0.5924, + "epoch": 0.44, + "grad_norm": 1.5259607105043078, + "learning_rate": 6.271233123014912e-06, + "loss": 0.5149, "step": 6139 }, { - "epoch": 0.65, - "grad_norm": 2.2866904253496174, - "learning_rate": 2.9405567164245096e-06, - "loss": 0.6054, + "epoch": 0.44, + "grad_norm": 0.7648063697617749, + "learning_rate": 6.270121684777892e-06, + "loss": 0.4062, "step": 6140 }, { - "epoch": 0.65, - "grad_norm": 3.2953941016991246, - "learning_rate": 2.939003876004424e-06, - "loss": 0.6667, + "epoch": 0.44, + "grad_norm": 2.751524512569989, + "learning_rate": 6.2690101794487405e-06, + "loss": 0.6027, "step": 6141 }, { - "epoch": 0.65, - "grad_norm": 4.230139296054894, - "learning_rate": 2.9374512750249098e-06, - "loss": 0.594, + "epoch": 0.44, + "grad_norm": 1.675003716083765, + "learning_rate": 6.267898607086169e-06, + "loss": 0.566, "step": 6142 }, { - "epoch": 0.65, - "grad_norm": 2.2675620169968975, - "learning_rate": 2.935898913666345e-06, - "loss": 0.6655, + "epoch": 0.44, + "grad_norm": 1.5016987172815628, + "learning_rate": 6.266786967748899e-06, + "loss": 0.551, "step": 6143 }, { - "epoch": 0.65, - "grad_norm": 2.459430858178251, - "learning_rate": 2.9343467921090774e-06, - "loss": 0.655, + "epoch": 0.44, + "grad_norm": 1.5423562554786494, + "learning_rate": 6.265675261495648e-06, + "loss": 0.4909, "step": 6144 }, { - "epoch": 0.65, - "grad_norm": 2.267669278148576, - "learning_rate": 2.9327949105334284e-06, - "loss": 0.633, + "epoch": 0.44, + "grad_norm": 1.9171359140426696, + "learning_rate": 6.264563488385142e-06, + "loss": 0.5061, "step": 6145 }, { - "epoch": 0.65, - "grad_norm": 2.4328862273124567, - "learning_rate": 2.93124326911969e-06, - "loss": 0.6138, + "epoch": 0.44, + "grad_norm": 4.678936170508005, + "learning_rate": 6.263451648476107e-06, + "loss": 0.5251, "step": 6146 }, { - "epoch": 0.65, - "grad_norm": 0.8997361338805473, - "learning_rate": 2.9296918680481308e-06, - "loss": 0.5905, + "epoch": 0.44, + "grad_norm": 1.5388787864978313, + "learning_rate": 6.262339741827276e-06, + "loss": 0.5849, "step": 6147 }, { - "epoch": 0.65, - "grad_norm": 2.7491447988759568, - "learning_rate": 2.928140707498984e-06, - "loss": 0.6503, + "epoch": 0.44, + "grad_norm": 2.384554247552369, + "learning_rate": 6.261227768497381e-06, + "loss": 0.6163, "step": 6148 }, { - "epoch": 0.65, - "grad_norm": 3.6968575644202506, - "learning_rate": 2.92658978765246e-06, - "loss": 0.5916, + "epoch": 0.44, + "grad_norm": 2.224197377473454, + "learning_rate": 6.260115728545162e-06, + "loss": 0.5727, "step": 6149 }, { - "epoch": 0.65, - "grad_norm": 3.5697760256627373, - "learning_rate": 2.925039108688742e-06, - "loss": 0.638, + "epoch": 0.44, + "grad_norm": 2.0989297124904986, + "learning_rate": 6.25900362202936e-06, + "loss": 0.5506, "step": 6150 }, { - "epoch": 0.65, - "grad_norm": 2.049482671759394, - "learning_rate": 2.9234886707879827e-06, - "loss": 0.6333, + "epoch": 0.44, + "grad_norm": 0.7624101202259755, + "learning_rate": 6.2578914490087215e-06, + "loss": 0.4365, "step": 6151 }, { - "epoch": 0.65, - "grad_norm": 2.446291237259445, - "learning_rate": 2.921938474130307e-06, - "loss": 0.652, + "epoch": 0.44, + "grad_norm": 1.782475018928272, + "learning_rate": 6.256779209541993e-06, + "loss": 0.5393, "step": 6152 }, { - "epoch": 0.65, - "grad_norm": 2.200019946516808, - "learning_rate": 2.9203885188958103e-06, - "loss": 0.5058, + "epoch": 0.44, + "grad_norm": 1.9247115404036428, + "learning_rate": 6.2556669036879305e-06, + "loss": 0.5967, "step": 6153 }, { - "epoch": 0.65, - "grad_norm": 3.0764287200010685, - "learning_rate": 2.9188388052645656e-06, - "loss": 0.6562, + "epoch": 0.44, + "grad_norm": 1.7011673386359527, + "learning_rate": 6.254554531505286e-06, + "loss": 0.5527, "step": 6154 }, { - "epoch": 0.65, - "grad_norm": 3.3263889642902593, - "learning_rate": 2.9172893334166108e-06, - "loss": 0.5888, + "epoch": 0.44, + "grad_norm": 0.7385748487037749, + "learning_rate": 6.2534420930528195e-06, + "loss": 0.4286, "step": 6155 }, { - "epoch": 0.65, - "grad_norm": 2.164471451455957, - "learning_rate": 2.915740103531963e-06, - "loss": 0.5748, + "epoch": 0.44, + "grad_norm": 1.5161974139428882, + "learning_rate": 6.252329588389294e-06, + "loss": 0.5272, "step": 6156 }, { - "epoch": 0.65, - "grad_norm": 3.0166454764249417, - "learning_rate": 2.9141911157906032e-06, - "loss": 0.6495, + "epoch": 0.44, + "grad_norm": 2.889235873565762, + "learning_rate": 6.2512170175734775e-06, + "loss": 0.5592, "step": 6157 }, { - "epoch": 0.65, - "grad_norm": 2.0796493091839743, - "learning_rate": 2.9126423703724925e-06, - "loss": 0.5941, + "epoch": 0.44, + "grad_norm": 1.8448791132858846, + "learning_rate": 6.250104380664137e-06, + "loss": 0.5469, "step": 6158 }, { - "epoch": 0.65, - "grad_norm": 2.472853403516785, - "learning_rate": 2.911093867457555e-06, - "loss": 0.6391, + "epoch": 0.44, + "grad_norm": 1.8051013063694148, + "learning_rate": 6.248991677720049e-06, + "loss": 0.5842, "step": 6159 }, { - "epoch": 0.65, - "grad_norm": 2.2590642055205663, - "learning_rate": 2.9095456072256955e-06, - "loss": 0.6039, + "epoch": 0.44, + "grad_norm": 1.5502540161998797, + "learning_rate": 6.247878908799988e-06, + "loss": 0.506, "step": 6160 }, { - "epoch": 0.65, - "grad_norm": 2.1770319394327617, - "learning_rate": 2.9079975898567823e-06, - "loss": 0.6178, + "epoch": 0.44, + "grad_norm": 2.0813819952494255, + "learning_rate": 6.2467660739627335e-06, + "loss": 0.4698, "step": 6161 }, { - "epoch": 0.65, - "grad_norm": 2.4623318761495, - "learning_rate": 2.906449815530664e-06, - "loss": 0.6449, + "epoch": 0.44, + "grad_norm": 1.5145498468738243, + "learning_rate": 6.2456531732670725e-06, + "loss": 0.4923, "step": 6162 }, { - "epoch": 0.65, - "grad_norm": 2.4317346877522916, - "learning_rate": 2.9049022844271517e-06, - "loss": 0.688, + "epoch": 0.44, + "grad_norm": 2.7066680956440012, + "learning_rate": 6.244540206771787e-06, + "loss": 0.538, "step": 6163 }, { - "epoch": 0.65, - "grad_norm": 2.427285093977721, - "learning_rate": 2.9033549967260383e-06, - "loss": 0.6544, + "epoch": 0.44, + "grad_norm": 2.2050823223399405, + "learning_rate": 6.243427174535672e-06, + "loss": 0.5496, "step": 6164 }, { - "epoch": 0.65, - "grad_norm": 2.4834448464193644, - "learning_rate": 2.9018079526070786e-06, - "loss": 0.5716, + "epoch": 0.44, + "grad_norm": 1.8253386741625213, + "learning_rate": 6.24231407661752e-06, + "loss": 0.5756, "step": 6165 }, { - "epoch": 0.65, - "grad_norm": 3.7395209019829614, - "learning_rate": 2.900261152250007e-06, - "loss": 0.542, + "epoch": 0.44, + "grad_norm": 1.9087996375251355, + "learning_rate": 6.241200913076131e-06, + "loss": 0.541, "step": 6166 }, { - "epoch": 0.65, - "grad_norm": 3.0529654367037993, - "learning_rate": 2.8987145958345235e-06, - "loss": 0.5837, + "epoch": 0.44, + "grad_norm": 1.5359523046901937, + "learning_rate": 6.240087683970302e-06, + "loss": 0.5358, "step": 6167 }, { - "epoch": 0.65, - "grad_norm": 3.1062690132576405, - "learning_rate": 2.8971682835403043e-06, - "loss": 0.6021, + "epoch": 0.44, + "grad_norm": 1.9341348886556613, + "learning_rate": 6.2389743893588405e-06, + "loss": 0.5499, "step": 6168 }, { - "epoch": 0.65, - "grad_norm": 2.8007780098866983, - "learning_rate": 2.895622215546997e-06, - "loss": 0.5921, + "epoch": 0.44, + "grad_norm": 1.9847867889561752, + "learning_rate": 6.2378610293005536e-06, + "loss": 0.608, "step": 6169 }, { - "epoch": 0.65, - "grad_norm": 3.200411944525784, - "learning_rate": 2.8940763920342153e-06, - "loss": 0.6037, + "epoch": 0.44, + "grad_norm": 1.8834368961291679, + "learning_rate": 6.236747603854252e-06, + "loss": 0.4832, "step": 6170 }, { - "epoch": 0.65, - "grad_norm": 4.858054420142283, - "learning_rate": 2.892530813181553e-06, - "loss": 0.6641, + "epoch": 0.44, + "grad_norm": 1.589830020588979, + "learning_rate": 6.23563411307875e-06, + "loss": 0.5925, "step": 6171 }, { - "epoch": 0.65, - "grad_norm": 2.5625816954495666, - "learning_rate": 2.8909854791685666e-06, - "loss": 0.5688, + "epoch": 0.44, + "grad_norm": 1.9198303737497495, + "learning_rate": 6.23452055703287e-06, + "loss": 0.5564, "step": 6172 }, { - "epoch": 0.65, - "grad_norm": 2.715416045079819, - "learning_rate": 2.889440390174793e-06, - "loss": 0.5802, + "epoch": 0.44, + "grad_norm": 1.6413817779682078, + "learning_rate": 6.2334069357754305e-06, + "loss": 0.5104, "step": 6173 }, { - "epoch": 0.65, - "grad_norm": 2.6641433870808893, - "learning_rate": 2.887895546379732e-06, - "loss": 0.5821, + "epoch": 0.44, + "grad_norm": 2.1040736103756315, + "learning_rate": 6.232293249365256e-06, + "loss": 0.5297, "step": 6174 }, { - "epoch": 0.65, - "grad_norm": 2.416515122562737, - "learning_rate": 2.8863509479628626e-06, - "loss": 0.6671, + "epoch": 0.44, + "grad_norm": 1.6439080460669913, + "learning_rate": 6.231179497861177e-06, + "loss": 0.4819, "step": 6175 }, { - "epoch": 0.65, - "grad_norm": 2.836952564545351, - "learning_rate": 2.884806595103628e-06, - "loss": 0.6201, + "epoch": 0.44, + "grad_norm": 1.627745819660884, + "learning_rate": 6.230065681322025e-06, + "loss": 0.5874, "step": 6176 }, { - "epoch": 0.65, - "grad_norm": 2.588917907113869, - "learning_rate": 2.8832624879814507e-06, - "loss": 0.5886, + "epoch": 0.44, + "grad_norm": 1.6354643101790303, + "learning_rate": 6.228951799806636e-06, + "loss": 0.5482, "step": 6177 }, { - "epoch": 0.65, - "grad_norm": 6.571817028335935, - "learning_rate": 2.8817186267757173e-06, - "loss": 0.5718, + "epoch": 0.44, + "grad_norm": 1.7619981749887288, + "learning_rate": 6.227837853373849e-06, + "loss": 0.5021, "step": 6178 }, { - "epoch": 0.65, - "grad_norm": 2.812640707900028, - "learning_rate": 2.88017501166579e-06, - "loss": 0.6379, + "epoch": 0.44, + "grad_norm": 1.8309392706265493, + "learning_rate": 6.226723842082505e-06, + "loss": 0.5775, "step": 6179 }, { - "epoch": 0.65, - "grad_norm": 2.626920559291416, - "learning_rate": 2.8786316428310046e-06, - "loss": 0.5918, + "epoch": 0.44, + "grad_norm": 1.6317995520885173, + "learning_rate": 6.225609765991452e-06, + "loss": 0.5944, "step": 6180 }, { - "epoch": 0.65, - "grad_norm": 2.798186376430685, - "learning_rate": 2.8770885204506603e-06, - "loss": 0.6388, + "epoch": 0.44, + "grad_norm": 1.7403009692322589, + "learning_rate": 6.224495625159538e-06, + "loss": 0.4849, "step": 6181 }, { - "epoch": 0.65, - "grad_norm": 2.751589330763615, - "learning_rate": 2.8755456447040362e-06, - "loss": 0.573, + "epoch": 0.44, + "grad_norm": 2.809357781127944, + "learning_rate": 6.223381419645615e-06, + "loss": 0.5581, "step": 6182 }, { - "epoch": 0.65, - "grad_norm": 2.28473507954531, - "learning_rate": 2.874003015770377e-06, - "loss": 0.6162, + "epoch": 0.44, + "grad_norm": 1.6411644107832721, + "learning_rate": 6.2222671495085415e-06, + "loss": 0.6185, "step": 6183 }, { - "epoch": 0.65, - "grad_norm": 2.557119980309637, - "learning_rate": 2.872460633828904e-06, - "loss": 0.5865, + "epoch": 0.44, + "grad_norm": 1.6449911107901565, + "learning_rate": 6.221152814807176e-06, + "loss": 0.49, "step": 6184 }, { - "epoch": 0.65, - "grad_norm": 3.1804128048003784, - "learning_rate": 2.8709184990588012e-06, - "loss": 0.6432, + "epoch": 0.44, + "grad_norm": 1.6068414688377948, + "learning_rate": 6.220038415600379e-06, + "loss": 0.5417, "step": 6185 }, { - "epoch": 0.65, - "grad_norm": 2.6384781468932905, - "learning_rate": 2.869376611639236e-06, - "loss": 0.6571, + "epoch": 0.44, + "grad_norm": 1.9969485878249782, + "learning_rate": 6.218923951947019e-06, + "loss": 0.4713, "step": 6186 }, { - "epoch": 0.65, - "grad_norm": 2.595212904243066, - "learning_rate": 2.8678349717493343e-06, - "loss": 0.6985, + "epoch": 0.44, + "grad_norm": 1.5844837576922057, + "learning_rate": 6.217809423905967e-06, + "loss": 0.458, "step": 6187 }, { - "epoch": 0.65, - "grad_norm": 3.0950307332286857, - "learning_rate": 2.8662935795682046e-06, - "loss": 0.6029, + "epoch": 0.44, + "grad_norm": 0.7709199699288237, + "learning_rate": 6.2166948315360945e-06, + "loss": 0.454, "step": 6188 }, { - "epoch": 0.65, - "grad_norm": 3.345799472219859, - "learning_rate": 2.864752435274916e-06, - "loss": 0.6059, + "epoch": 0.44, + "grad_norm": 3.1604570184006198, + "learning_rate": 6.2155801748962774e-06, + "loss": 0.5671, "step": 6189 }, { - "epoch": 0.65, - "grad_norm": 2.25918445844699, - "learning_rate": 2.8632115390485176e-06, - "loss": 0.676, + "epoch": 0.44, + "grad_norm": 1.8646447671552364, + "learning_rate": 6.2144654540453965e-06, + "loss": 0.5616, "step": 6190 }, { - "epoch": 0.65, - "grad_norm": 2.322802206550409, - "learning_rate": 2.8616708910680278e-06, - "loss": 0.6211, + "epoch": 0.44, + "grad_norm": 1.5365011377098845, + "learning_rate": 6.213350669042335e-06, + "loss": 0.5486, "step": 6191 }, { - "epoch": 0.65, - "grad_norm": 2.523140689849376, - "learning_rate": 2.8601304915124305e-06, - "loss": 0.6485, + "epoch": 0.44, + "grad_norm": 1.5281194323623237, + "learning_rate": 6.212235819945982e-06, + "loss": 0.5355, "step": 6192 }, { - "epoch": 0.65, - "grad_norm": 3.013530763468245, - "learning_rate": 2.85859034056069e-06, - "loss": 0.6619, + "epoch": 0.44, + "grad_norm": 1.536637152413724, + "learning_rate": 6.2111209068152226e-06, + "loss": 0.5132, "step": 6193 }, { - "epoch": 0.65, - "grad_norm": 3.291612700420486, - "learning_rate": 2.8570504383917296e-06, - "loss": 0.5981, + "epoch": 0.44, + "grad_norm": 3.2466302964554465, + "learning_rate": 6.210005929708954e-06, + "loss": 0.5531, "step": 6194 }, { - "epoch": 0.65, - "grad_norm": 2.4140328386952463, - "learning_rate": 2.8555107851844576e-06, - "loss": 0.5461, + "epoch": 0.44, + "grad_norm": 2.4598271805125638, + "learning_rate": 6.208890888686072e-06, + "loss": 0.6179, "step": 6195 }, { - "epoch": 0.65, - "grad_norm": 2.687609928584908, - "learning_rate": 2.8539713811177418e-06, - "loss": 0.6283, + "epoch": 0.44, + "grad_norm": 1.5699757222333501, + "learning_rate": 6.2077757838054775e-06, + "loss": 0.5789, "step": 6196 }, { - "epoch": 0.65, - "grad_norm": 2.3244100962522163, - "learning_rate": 2.8524322263704297e-06, - "loss": 0.5716, + "epoch": 0.44, + "grad_norm": 1.8229522874119948, + "learning_rate": 6.206660615126073e-06, + "loss": 0.5209, "step": 6197 }, { - "epoch": 0.65, - "grad_norm": 2.1973658455412997, - "learning_rate": 2.8508933211213306e-06, - "loss": 0.531, + "epoch": 0.44, + "grad_norm": 1.7041064107277306, + "learning_rate": 6.205545382706768e-06, + "loss": 0.5641, "step": 6198 }, { - "epoch": 0.65, - "grad_norm": 2.0935858472905258, - "learning_rate": 2.8493546655492356e-06, - "loss": 0.598, + "epoch": 0.44, + "grad_norm": 1.6674827284717544, + "learning_rate": 6.204430086606469e-06, + "loss": 0.5016, "step": 6199 }, { - "epoch": 0.65, - "grad_norm": 2.77092033346428, - "learning_rate": 2.8478162598328963e-06, - "loss": 0.552, + "epoch": 0.44, + "grad_norm": 2.117914363686433, + "learning_rate": 6.203314726884091e-06, + "loss": 0.5517, "step": 6200 }, { - "epoch": 0.65, - "grad_norm": 2.624275390466366, - "learning_rate": 2.8462781041510446e-06, - "loss": 0.666, + "epoch": 0.44, + "grad_norm": 1.5255239957725937, + "learning_rate": 6.202199303598553e-06, + "loss": 0.6193, "step": 6201 }, { - "epoch": 0.65, - "grad_norm": 2.1540146591782405, - "learning_rate": 2.8447401986823752e-06, - "loss": 0.6687, + "epoch": 0.44, + "grad_norm": 1.623757319562821, + "learning_rate": 6.201083816808774e-06, + "loss": 0.5583, "step": 6202 }, { - "epoch": 0.65, - "grad_norm": 2.263646788490095, - "learning_rate": 2.8432025436055593e-06, - "loss": 0.6402, + "epoch": 0.44, + "grad_norm": 1.687568547448088, + "learning_rate": 6.199968266573678e-06, + "loss": 0.5034, "step": 6203 }, { - "epoch": 0.65, - "grad_norm": 2.537185698965246, - "learning_rate": 2.841665139099239e-06, - "loss": 0.6249, + "epoch": 0.44, + "grad_norm": 2.142736276580768, + "learning_rate": 6.198852652952193e-06, + "loss": 0.5507, "step": 6204 }, { - "epoch": 0.65, - "grad_norm": 2.4385362216870554, - "learning_rate": 2.8401279853420216e-06, - "loss": 0.6824, + "epoch": 0.44, + "grad_norm": 0.7336887062723735, + "learning_rate": 6.197736976003247e-06, + "loss": 0.4499, "step": 6205 }, { - "epoch": 0.65, - "grad_norm": 1.1466991106289137, - "learning_rate": 2.838591082512494e-06, - "loss": 0.5709, + "epoch": 0.44, + "grad_norm": 1.4382025396164515, + "learning_rate": 6.196621235785776e-06, + "loss": 0.4628, "step": 6206 }, { - "epoch": 0.65, - "grad_norm": 2.076550784919779, - "learning_rate": 2.837054430789204e-06, - "loss": 0.6901, + "epoch": 0.44, + "grad_norm": 1.5621131880838122, + "learning_rate": 6.195505432358717e-06, + "loss": 0.53, "step": 6207 }, { - "epoch": 0.65, - "grad_norm": 2.7605170118341413, - "learning_rate": 2.8355180303506803e-06, - "loss": 0.6498, + "epoch": 0.44, + "grad_norm": 1.5569820390980393, + "learning_rate": 6.194389565781008e-06, + "loss": 0.4909, "step": 6208 }, { - "epoch": 0.65, - "grad_norm": 2.224223567454739, - "learning_rate": 2.8339818813754115e-06, - "loss": 0.6497, + "epoch": 0.44, + "grad_norm": 1.4800681596521752, + "learning_rate": 6.1932736361115984e-06, + "loss": 0.4824, "step": 6209 }, { - "epoch": 0.65, - "grad_norm": 2.348664612330819, - "learning_rate": 2.8324459840418694e-06, - "loss": 0.6267, + "epoch": 0.44, + "grad_norm": 1.6841342147700906, + "learning_rate": 6.19215764340943e-06, + "loss": 0.5703, "step": 6210 }, { - "epoch": 0.65, - "grad_norm": 2.9347639572736144, - "learning_rate": 2.8309103385284853e-06, - "loss": 0.6423, + "epoch": 0.44, + "grad_norm": 0.834240733707024, + "learning_rate": 6.191041587733456e-06, + "loss": 0.4237, "step": 6211 }, { - "epoch": 0.65, - "grad_norm": 2.408795609146555, - "learning_rate": 2.82937494501367e-06, - "loss": 0.6295, + "epoch": 0.44, + "grad_norm": 1.7158978162060057, + "learning_rate": 6.189925469142629e-06, + "loss": 0.5383, "step": 6212 }, { - "epoch": 0.65, - "grad_norm": 3.8368693625535277, - "learning_rate": 2.8278398036757963e-06, - "loss": 0.5594, + "epoch": 0.44, + "grad_norm": 1.9712461494873257, + "learning_rate": 6.188809287695907e-06, + "loss": 0.5065, "step": 6213 }, { - "epoch": 0.65, - "grad_norm": 2.51892710402293, - "learning_rate": 2.8263049146932153e-06, - "loss": 0.5737, + "epoch": 0.44, + "grad_norm": 1.5990474779773305, + "learning_rate": 6.187693043452251e-06, + "loss": 0.571, "step": 6214 }, { - "epoch": 0.65, - "grad_norm": 3.302046825233197, - "learning_rate": 2.8247702782442483e-06, - "loss": 0.6769, + "epoch": 0.44, + "grad_norm": 0.7815430489918583, + "learning_rate": 6.186576736470622e-06, + "loss": 0.4775, "step": 6215 }, { - "epoch": 0.65, - "grad_norm": 3.85067453672738, - "learning_rate": 2.8232358945071804e-06, - "loss": 0.5899, + "epoch": 0.44, + "grad_norm": 1.8629920043976602, + "learning_rate": 6.1854603668099896e-06, + "loss": 0.5209, "step": 6216 }, { - "epoch": 0.65, - "grad_norm": 2.9563736592529963, - "learning_rate": 2.821701763660275e-06, - "loss": 0.6199, + "epoch": 0.44, + "grad_norm": 1.8963622335438568, + "learning_rate": 6.184343934529326e-06, + "loss": 0.5297, "step": 6217 }, { - "epoch": 0.65, - "grad_norm": 2.8979485285850846, - "learning_rate": 2.8201678858817604e-06, - "loss": 0.5747, + "epoch": 0.44, + "grad_norm": 1.61221934032901, + "learning_rate": 6.1832274396876e-06, + "loss": 0.5715, "step": 6218 }, { - "epoch": 0.65, - "grad_norm": 6.242630295137544, - "learning_rate": 2.818634261349842e-06, - "loss": 0.5828, + "epoch": 0.44, + "grad_norm": 1.593595627784326, + "learning_rate": 6.182110882343794e-06, + "loss": 0.5191, "step": 6219 }, { - "epoch": 0.65, - "grad_norm": 2.8275662714905065, - "learning_rate": 2.8171008902426865e-06, - "loss": 0.702, + "epoch": 0.44, + "grad_norm": 1.4927035826447947, + "learning_rate": 6.180994262556883e-06, + "loss": 0.5573, "step": 6220 }, { - "epoch": 0.65, - "grad_norm": 2.2586239782876385, - "learning_rate": 2.8155677727384422e-06, - "loss": 0.5648, + "epoch": 0.44, + "grad_norm": 1.6555281521915473, + "learning_rate": 6.179877580385854e-06, + "loss": 0.5647, "step": 6221 }, { - "epoch": 0.65, - "grad_norm": 3.5846393384463773, - "learning_rate": 2.814034909015217e-06, - "loss": 0.6001, + "epoch": 0.44, + "grad_norm": 2.0348484358409746, + "learning_rate": 6.178760835889693e-06, + "loss": 0.5758, "step": 6222 }, { - "epoch": 0.65, - "grad_norm": 2.736699486370083, - "learning_rate": 2.8125022992510997e-06, - "loss": 0.5504, + "epoch": 0.44, + "grad_norm": 1.6724643936881138, + "learning_rate": 6.177644029127391e-06, + "loss": 0.5891, "step": 6223 }, { - "epoch": 0.65, - "grad_norm": 2.982453703063961, - "learning_rate": 2.810969943624139e-06, - "loss": 0.6271, + "epoch": 0.44, + "grad_norm": 5.829288752461, + "learning_rate": 6.1765271601579415e-06, + "loss": 0.5534, "step": 6224 }, { - "epoch": 0.66, - "grad_norm": 1.0378158485658642, - "learning_rate": 2.8094378423123646e-06, - "loss": 0.5252, + "epoch": 0.44, + "grad_norm": 1.4815107342645737, + "learning_rate": 6.17541022904034e-06, + "loss": 0.4882, "step": 6225 }, { - "epoch": 0.66, - "grad_norm": 2.368853983880845, - "learning_rate": 2.807905995493768e-06, - "loss": 0.5657, + "epoch": 0.44, + "grad_norm": 2.1661590045651895, + "learning_rate": 6.1742932358335885e-06, + "loss": 0.5423, "step": 6226 }, { - "epoch": 0.66, - "grad_norm": 2.6453989305193906, - "learning_rate": 2.8063744033463157e-06, - "loss": 0.6209, + "epoch": 0.44, + "grad_norm": 1.4789498097241023, + "learning_rate": 6.173176180596687e-06, + "loss": 0.5725, "step": 6227 }, { - "epoch": 0.66, - "grad_norm": 3.5318565272857363, - "learning_rate": 2.8048430660479463e-06, - "loss": 0.5866, + "epoch": 0.44, + "grad_norm": 1.8176100947467226, + "learning_rate": 6.172059063388647e-06, + "loss": 0.5642, "step": 6228 }, { - "epoch": 0.66, - "grad_norm": 3.8249719217174523, - "learning_rate": 2.803311983776562e-06, - "loss": 0.6002, + "epoch": 0.44, + "grad_norm": 1.7426328800125375, + "learning_rate": 6.170941884268474e-06, + "loss": 0.5834, "step": 6229 }, { - "epoch": 0.66, - "grad_norm": 2.4969861662636537, - "learning_rate": 2.8017811567100434e-06, - "loss": 0.6462, + "epoch": 0.44, + "grad_norm": 2.1388167962460383, + "learning_rate": 6.169824643295184e-06, + "loss": 0.5012, "step": 6230 }, { - "epoch": 0.66, - "grad_norm": 2.3785984897135353, - "learning_rate": 2.8002505850262334e-06, - "loss": 0.5927, + "epoch": 0.44, + "grad_norm": 17.986604005793666, + "learning_rate": 6.168707340527791e-06, + "loss": 0.5548, "step": 6231 }, { - "epoch": 0.66, - "grad_norm": 2.141017713761464, - "learning_rate": 2.7987202689029535e-06, - "loss": 0.6416, + "epoch": 0.44, + "grad_norm": 1.4494248325167172, + "learning_rate": 6.167589976025318e-06, + "loss": 0.4875, "step": 6232 }, { - "epoch": 0.66, - "grad_norm": 2.9090874878260973, - "learning_rate": 2.797190208517988e-06, - "loss": 0.6156, + "epoch": 0.44, + "grad_norm": 1.8625723393003484, + "learning_rate": 6.166472549846785e-06, + "loss": 0.5776, "step": 6233 }, { - "epoch": 0.66, - "grad_norm": 2.0212371235951685, - "learning_rate": 2.795660404049098e-06, - "loss": 0.6247, + "epoch": 0.44, + "grad_norm": 1.5983690048936794, + "learning_rate": 6.16535506205122e-06, + "loss": 0.5794, "step": 6234 }, { - "epoch": 0.66, - "grad_norm": 2.2096556030865004, - "learning_rate": 2.794130855674009e-06, - "loss": 0.6012, + "epoch": 0.44, + "grad_norm": 2.0109205787572058, + "learning_rate": 6.164237512697652e-06, + "loss": 0.5494, "step": 6235 }, { - "epoch": 0.66, - "grad_norm": 2.2361232833661924, - "learning_rate": 2.7926015635704216e-06, - "loss": 0.6237, + "epoch": 0.44, + "grad_norm": 0.7836244380548796, + "learning_rate": 6.1631199018451115e-06, + "loss": 0.422, "step": 6236 }, { - "epoch": 0.66, - "grad_norm": 2.605730880983019, - "learning_rate": 2.7910725279160016e-06, - "loss": 0.6243, + "epoch": 0.44, + "grad_norm": 1.6712249127803358, + "learning_rate": 6.162002229552638e-06, + "loss": 0.5797, "step": 6237 }, { - "epoch": 0.66, - "grad_norm": 2.33390242289277, - "learning_rate": 2.78954374888839e-06, - "loss": 0.6674, + "epoch": 0.44, + "grad_norm": 2.8955147216207138, + "learning_rate": 6.160884495879269e-06, + "loss": 0.5483, "step": 6238 }, { - "epoch": 0.66, - "grad_norm": 3.329689514740396, - "learning_rate": 2.7880152266651985e-06, - "loss": 0.6576, + "epoch": 0.44, + "grad_norm": 1.3617425228454885, + "learning_rate": 6.159766700884049e-06, + "loss": 0.6039, "step": 6239 }, { - "epoch": 0.66, - "grad_norm": 4.400052397965454, - "learning_rate": 2.7864869614240013e-06, - "loss": 0.5916, + "epoch": 0.44, + "grad_norm": 1.59313777763064, + "learning_rate": 6.15864884462602e-06, + "loss": 0.5329, "step": 6240 }, { - "epoch": 0.66, - "grad_norm": 2.437094841587654, - "learning_rate": 2.7849589533423526e-06, - "loss": 0.6769, + "epoch": 0.44, + "grad_norm": 1.6879669534920296, + "learning_rate": 6.157530927164235e-06, + "loss": 0.5229, "step": 6241 }, { - "epoch": 0.66, - "grad_norm": 2.158441047716093, - "learning_rate": 2.783431202597767e-06, - "loss": 0.5817, + "epoch": 0.44, + "grad_norm": 1.8037394547560757, + "learning_rate": 6.156412948557743e-06, + "loss": 0.5162, "step": 6242 }, { - "epoch": 0.66, - "grad_norm": 3.202778092080676, - "learning_rate": 2.78190370936774e-06, - "loss": 0.6905, + "epoch": 0.44, + "grad_norm": 1.6176573591341008, + "learning_rate": 6.1552949088656015e-06, + "loss": 0.5572, "step": 6243 }, { - "epoch": 0.66, - "grad_norm": 2.181636718933403, - "learning_rate": 2.7803764738297257e-06, - "loss": 0.6753, + "epoch": 0.44, + "grad_norm": 1.8427606601954893, + "learning_rate": 6.154176808146867e-06, + "loss": 0.5365, "step": 6244 }, { - "epoch": 0.66, - "grad_norm": 2.169908589296973, - "learning_rate": 2.7788494961611577e-06, - "loss": 0.6062, + "epoch": 0.44, + "grad_norm": 1.697066633073979, + "learning_rate": 6.153058646460603e-06, + "loss": 0.5583, "step": 6245 }, { - "epoch": 0.66, - "grad_norm": 2.1889402639310336, - "learning_rate": 2.7773227765394335e-06, - "loss": 0.6787, + "epoch": 0.44, + "grad_norm": 2.4994415746224634, + "learning_rate": 6.151940423865873e-06, + "loss": 0.6351, "step": 6246 }, { - "epoch": 0.66, - "grad_norm": 2.6886102911065244, - "learning_rate": 2.7757963151419255e-06, - "loss": 0.6211, + "epoch": 0.44, + "grad_norm": 1.8157814585701075, + "learning_rate": 6.150822140421749e-06, + "loss": 0.5145, "step": 6247 }, { - "epoch": 0.66, - "grad_norm": 3.620084649426985, - "learning_rate": 2.7742701121459703e-06, - "loss": 0.5493, + "epoch": 0.44, + "grad_norm": 1.6593396021279347, + "learning_rate": 6.149703796187297e-06, + "loss": 0.5795, "step": 6248 }, { - "epoch": 0.66, - "grad_norm": 2.0808691207938845, - "learning_rate": 2.772744167728879e-06, - "loss": 0.6245, + "epoch": 0.44, + "grad_norm": 1.9478953416822407, + "learning_rate": 6.148585391221597e-06, + "loss": 0.5617, "step": 6249 }, { - "epoch": 0.66, - "grad_norm": 0.94937587598107, - "learning_rate": 2.7712184820679343e-06, - "loss": 0.5034, + "epoch": 0.44, + "grad_norm": 1.63436514829823, + "learning_rate": 6.147466925583725e-06, + "loss": 0.5688, "step": 6250 }, { - "epoch": 0.66, - "grad_norm": 2.750670337900543, - "learning_rate": 2.7696930553403817e-06, - "loss": 0.6075, + "epoch": 0.44, + "grad_norm": 1.645990993281783, + "learning_rate": 6.14634839933276e-06, + "loss": 0.5658, "step": 6251 }, { - "epoch": 0.66, - "grad_norm": 1.8167968242924053, - "learning_rate": 2.7681678877234446e-06, - "loss": 0.567, + "epoch": 0.44, + "grad_norm": 1.9717124058818387, + "learning_rate": 6.145229812527789e-06, + "loss": 0.5166, "step": 6252 }, { - "epoch": 0.66, - "grad_norm": 2.3048720790451163, - "learning_rate": 2.7666429793943087e-06, - "loss": 0.6424, + "epoch": 0.44, + "grad_norm": 1.6890334311952144, + "learning_rate": 6.144111165227899e-06, + "loss": 0.4972, "step": 6253 }, { - "epoch": 0.66, - "grad_norm": 2.978504924873048, - "learning_rate": 2.765118330530138e-06, - "loss": 0.6175, + "epoch": 0.44, + "grad_norm": 1.6392733997739652, + "learning_rate": 6.142992457492181e-06, + "loss": 0.5617, "step": 6254 }, { - "epoch": 0.66, - "grad_norm": 2.9075301291638174, - "learning_rate": 2.763593941308057e-06, - "loss": 0.6043, + "epoch": 0.44, + "grad_norm": 1.6143278914940584, + "learning_rate": 6.141873689379727e-06, + "loss": 0.5735, "step": 6255 }, { - "epoch": 0.66, - "grad_norm": 3.3429392341032482, - "learning_rate": 2.7620698119051687e-06, - "loss": 0.6713, + "epoch": 0.44, + "grad_norm": 1.7310510130857852, + "learning_rate": 6.140754860949637e-06, + "loss": 0.6626, "step": 6256 }, { - "epoch": 0.66, - "grad_norm": 2.077361667669363, - "learning_rate": 2.7605459424985387e-06, - "loss": 0.7026, + "epoch": 0.44, + "grad_norm": 1.749418515672551, + "learning_rate": 6.139635972261009e-06, + "loss": 0.5306, "step": 6257 }, { - "epoch": 0.66, - "grad_norm": 2.5927187596499923, - "learning_rate": 2.7590223332652096e-06, - "loss": 0.6464, + "epoch": 0.44, + "grad_norm": 1.6124828419584367, + "learning_rate": 6.138517023372949e-06, + "loss": 0.5758, "step": 6258 }, { - "epoch": 0.66, - "grad_norm": 2.588598586482582, - "learning_rate": 2.7574989843821855e-06, - "loss": 0.7161, + "epoch": 0.44, + "grad_norm": 2.4289655846329805, + "learning_rate": 6.137398014344559e-06, + "loss": 0.5115, "step": 6259 }, { - "epoch": 0.66, - "grad_norm": 2.9831247521107374, - "learning_rate": 2.7559758960264492e-06, - "loss": 0.6043, + "epoch": 0.44, + "grad_norm": 2.1461138950780114, + "learning_rate": 6.136278945234954e-06, + "loss": 0.4833, "step": 6260 }, { - "epoch": 0.66, - "grad_norm": 13.839156626103529, - "learning_rate": 2.7544530683749447e-06, - "loss": 0.6524, + "epoch": 0.44, + "grad_norm": 3.4970710816669546, + "learning_rate": 6.135159816103243e-06, + "loss": 0.5403, "step": 6261 }, { - "epoch": 0.66, - "grad_norm": 2.302955982405863, - "learning_rate": 2.7529305016045917e-06, - "loss": 0.6315, + "epoch": 0.44, + "grad_norm": 3.3619107183317567, + "learning_rate": 6.134040627008546e-06, + "loss": 0.557, "step": 6262 }, { - "epoch": 0.66, - "grad_norm": 4.745223080871031, - "learning_rate": 2.751408195892279e-06, - "loss": 0.6543, + "epoch": 0.44, + "grad_norm": 1.563213903190383, + "learning_rate": 6.132921378009979e-06, + "loss": 0.5326, "step": 6263 }, { - "epoch": 0.66, - "grad_norm": 3.2451920481026892, - "learning_rate": 2.74988615141486e-06, - "loss": 0.5986, + "epoch": 0.44, + "grad_norm": 1.7780829680616153, + "learning_rate": 6.131802069166668e-06, + "loss": 0.5137, "step": 6264 }, { - "epoch": 0.66, - "grad_norm": 3.1406872121844263, - "learning_rate": 2.7483643683491658e-06, - "loss": 0.5726, + "epoch": 0.44, + "grad_norm": 1.8972372370268282, + "learning_rate": 6.1306827005377346e-06, + "loss": 0.5228, "step": 6265 }, { - "epoch": 0.66, - "grad_norm": 3.484418230228393, - "learning_rate": 2.7468428468719877e-06, - "loss": 0.7162, + "epoch": 0.44, + "grad_norm": 0.8571711819896531, + "learning_rate": 6.129563272182311e-06, + "loss": 0.4409, "step": 6266 }, { - "epoch": 0.66, - "grad_norm": 2.329591736950578, - "learning_rate": 2.7453215871600967e-06, - "loss": 0.5947, + "epoch": 0.44, + "grad_norm": 1.6146407453371268, + "learning_rate": 6.128443784159526e-06, + "loss": 0.5145, "step": 6267 }, { - "epoch": 0.66, - "grad_norm": 2.4045111597868307, - "learning_rate": 2.743800589390225e-06, - "loss": 0.6726, + "epoch": 0.44, + "grad_norm": 0.9411797893544394, + "learning_rate": 6.127324236528519e-06, + "loss": 0.4751, "step": 6268 }, { - "epoch": 0.66, - "grad_norm": 2.3627978273372885, - "learning_rate": 2.74227985373908e-06, - "loss": 0.5535, + "epoch": 0.44, + "grad_norm": 1.7601298166076602, + "learning_rate": 6.126204629348425e-06, + "loss": 0.4879, "step": 6269 }, { - "epoch": 0.66, - "grad_norm": 3.6457501096909266, - "learning_rate": 2.7407593803833333e-06, - "loss": 0.6602, + "epoch": 0.44, + "grad_norm": 1.6247471615189382, + "learning_rate": 6.125084962678385e-06, + "loss": 0.4863, "step": 6270 }, { - "epoch": 0.66, - "grad_norm": 2.926138821288341, - "learning_rate": 2.7392391694996335e-06, - "loss": 0.7381, + "epoch": 0.45, + "grad_norm": 1.7757284189282887, + "learning_rate": 6.123965236577547e-06, + "loss": 0.5948, "step": 6271 }, { - "epoch": 0.66, - "grad_norm": 1.1259023466885845, - "learning_rate": 2.7377192212645888e-06, - "loss": 0.5446, + "epoch": 0.45, + "grad_norm": 1.7007656689665966, + "learning_rate": 6.122845451105055e-06, + "loss": 0.5529, "step": 6272 }, { - "epoch": 0.66, - "grad_norm": 2.231315217630402, - "learning_rate": 2.736199535854788e-06, - "loss": 0.566, + "epoch": 0.45, + "grad_norm": 1.761731132253299, + "learning_rate": 6.121725606320063e-06, + "loss": 0.4897, "step": 6273 }, { - "epoch": 0.66, - "grad_norm": 2.7856102084315983, - "learning_rate": 2.7346801134467794e-06, - "loss": 0.606, + "epoch": 0.45, + "grad_norm": 2.09351514496744, + "learning_rate": 6.120605702281722e-06, + "loss": 0.5533, "step": 6274 }, { - "epoch": 0.66, - "grad_norm": 2.6026849229772218, - "learning_rate": 2.733160954217086e-06, - "loss": 0.6111, + "epoch": 0.45, + "grad_norm": 1.6651256125184892, + "learning_rate": 6.119485739049193e-06, + "loss": 0.5354, "step": 6275 }, { - "epoch": 0.66, - "grad_norm": 2.5134836440676724, - "learning_rate": 2.731642058342203e-06, - "loss": 0.5827, + "epoch": 0.45, + "grad_norm": 0.8518558758012634, + "learning_rate": 6.1183657166816314e-06, + "loss": 0.4636, "step": 6276 }, { - "epoch": 0.66, - "grad_norm": 2.2996578580915323, - "learning_rate": 2.7301234259985863e-06, - "loss": 0.5993, + "epoch": 0.45, + "grad_norm": 1.4462954038825175, + "learning_rate": 6.117245635238204e-06, + "loss": 0.5522, "step": 6277 }, { - "epoch": 0.66, - "grad_norm": 2.937914077737516, - "learning_rate": 2.72860505736267e-06, - "loss": 0.6798, + "epoch": 0.45, + "grad_norm": 2.968619172750841, + "learning_rate": 6.116125494778078e-06, + "loss": 0.5732, "step": 6278 }, { - "epoch": 0.66, - "grad_norm": 2.9983367460524004, - "learning_rate": 2.7270869526108507e-06, - "loss": 0.6152, + "epoch": 0.45, + "grad_norm": 1.6727639050631524, + "learning_rate": 6.11500529536042e-06, + "loss": 0.5132, "step": 6279 }, { - "epoch": 0.66, - "grad_norm": 2.6496495044995605, - "learning_rate": 2.7255691119195005e-06, - "loss": 0.6753, + "epoch": 0.45, + "grad_norm": 1.6731763441805236, + "learning_rate": 6.113885037044405e-06, + "loss": 0.621, "step": 6280 }, { - "epoch": 0.66, - "grad_norm": 2.138925305786583, - "learning_rate": 2.7240515354649545e-06, - "loss": 0.6051, + "epoch": 0.45, + "grad_norm": 1.4705909323859574, + "learning_rate": 6.112764719889206e-06, + "loss": 0.4712, "step": 6281 }, { - "epoch": 0.66, - "grad_norm": 2.490293141221245, - "learning_rate": 2.722534223423524e-06, - "loss": 0.6335, + "epoch": 0.45, + "grad_norm": 1.6383368203352928, + "learning_rate": 6.111644343954005e-06, + "loss": 0.5224, "step": 6282 }, { - "epoch": 0.66, - "grad_norm": 2.322910264522336, - "learning_rate": 2.721017175971482e-06, - "loss": 0.6405, + "epoch": 0.45, + "grad_norm": 1.6077922123191906, + "learning_rate": 6.110523909297984e-06, + "loss": 0.6028, "step": 6283 }, { - "epoch": 0.66, - "grad_norm": 2.643925276634918, - "learning_rate": 2.719500393285076e-06, - "loss": 0.6404, + "epoch": 0.45, + "grad_norm": 2.490090985109898, + "learning_rate": 6.109403415980326e-06, + "loss": 0.5605, "step": 6284 }, { - "epoch": 0.66, - "grad_norm": 2.0080720283021827, - "learning_rate": 2.7179838755405253e-06, - "loss": 0.5701, + "epoch": 0.45, + "grad_norm": 2.4572898598120876, + "learning_rate": 6.108282864060221e-06, + "loss": 0.6022, "step": 6285 }, { - "epoch": 0.66, - "grad_norm": 2.5525745426515476, - "learning_rate": 2.7164676229140098e-06, - "loss": 0.6459, + "epoch": 0.45, + "grad_norm": 1.480338188907449, + "learning_rate": 6.107162253596859e-06, + "loss": 0.4921, "step": 6286 }, { - "epoch": 0.66, - "grad_norm": 2.512640406594001, - "learning_rate": 2.714951635581684e-06, - "loss": 0.6733, + "epoch": 0.45, + "grad_norm": 2.6627672810356384, + "learning_rate": 6.106041584649435e-06, + "loss": 0.5393, "step": 6287 }, { - "epoch": 0.66, - "grad_norm": 0.9599284170567002, - "learning_rate": 2.713435913719671e-06, - "loss": 0.5056, + "epoch": 0.45, + "grad_norm": 0.7593494144629737, + "learning_rate": 6.104920857277147e-06, + "loss": 0.4462, "step": 6288 }, { - "epoch": 0.66, - "grad_norm": 2.2612027984032435, - "learning_rate": 2.7119204575040666e-06, - "loss": 0.6296, + "epoch": 0.45, + "grad_norm": 2.594381554695951, + "learning_rate": 6.103800071539196e-06, + "loss": 0.4865, "step": 6289 }, { - "epoch": 0.66, - "grad_norm": 2.856455315447502, - "learning_rate": 2.7104052671109267e-06, - "loss": 0.6801, + "epoch": 0.45, + "grad_norm": 1.6960217135075029, + "learning_rate": 6.102679227494785e-06, + "loss": 0.6087, "step": 6290 }, { - "epoch": 0.66, - "grad_norm": 2.527783580967457, - "learning_rate": 2.708890342716286e-06, - "loss": 0.5643, + "epoch": 0.45, + "grad_norm": 1.6747573741025117, + "learning_rate": 6.10155832520312e-06, + "loss": 0.5259, "step": 6291 }, { - "epoch": 0.66, - "grad_norm": 3.314451080373654, - "learning_rate": 2.7073756844961407e-06, - "loss": 0.6189, + "epoch": 0.45, + "grad_norm": 2.205040167932142, + "learning_rate": 6.100437364723412e-06, + "loss": 0.5446, "step": 6292 }, { - "epoch": 0.66, - "grad_norm": 2.232485359666232, - "learning_rate": 2.7058612926264634e-06, - "loss": 0.6106, + "epoch": 0.45, + "grad_norm": 1.80616112561425, + "learning_rate": 6.099316346114874e-06, + "loss": 0.5636, "step": 6293 }, { - "epoch": 0.66, - "grad_norm": 1.9749775537277983, - "learning_rate": 2.7043471672831866e-06, - "loss": 0.597, + "epoch": 0.45, + "grad_norm": 1.4748848929800007, + "learning_rate": 6.098195269436722e-06, + "loss": 0.5092, "step": 6294 }, { - "epoch": 0.66, - "grad_norm": 2.570926833062, - "learning_rate": 2.7028333086422232e-06, - "loss": 0.5325, + "epoch": 0.45, + "grad_norm": 0.7488479915985337, + "learning_rate": 6.097074134748174e-06, + "loss": 0.4682, "step": 6295 }, { - "epoch": 0.66, - "grad_norm": 2.58096398136124, - "learning_rate": 2.7013197168794424e-06, - "loss": 0.6335, + "epoch": 0.45, + "grad_norm": 2.3134720416105514, + "learning_rate": 6.095952942108453e-06, + "loss": 0.4841, "step": 6296 }, { - "epoch": 0.66, - "grad_norm": 2.1566887676933293, - "learning_rate": 2.699806392170693e-06, - "loss": 0.6744, + "epoch": 0.45, + "grad_norm": 1.904022322153139, + "learning_rate": 6.094831691576783e-06, + "loss": 0.5963, "step": 6297 }, { - "epoch": 0.66, - "grad_norm": 2.2365763878000284, - "learning_rate": 2.698293334691789e-06, - "loss": 0.6178, + "epoch": 0.45, + "grad_norm": 4.296837633753355, + "learning_rate": 6.093710383212396e-06, + "loss": 0.4625, "step": 6298 }, { - "epoch": 0.66, - "grad_norm": 5.206253358431679, - "learning_rate": 2.69678054461851e-06, - "loss": 0.6438, + "epoch": 0.45, + "grad_norm": 1.824140123248322, + "learning_rate": 6.092589017074517e-06, + "loss": 0.5638, "step": 6299 }, { - "epoch": 0.66, - "grad_norm": 2.4308841962891514, - "learning_rate": 2.6952680221266116e-06, - "loss": 0.6907, + "epoch": 0.45, + "grad_norm": 1.6296849796776893, + "learning_rate": 6.0914675932223865e-06, + "loss": 0.5354, "step": 6300 }, { - "epoch": 0.66, - "grad_norm": 2.1155846785270302, - "learning_rate": 2.6937557673918096e-06, - "loss": 0.5513, + "epoch": 0.45, + "grad_norm": 1.7669937105130415, + "learning_rate": 6.090346111715239e-06, + "loss": 0.5216, "step": 6301 }, { - "epoch": 0.66, - "grad_norm": 2.102074075556992, - "learning_rate": 2.692243780589798e-06, - "loss": 0.6146, + "epoch": 0.45, + "grad_norm": 2.176589261455902, + "learning_rate": 6.089224572612313e-06, + "loss": 0.4818, "step": 6302 }, { - "epoch": 0.66, - "grad_norm": 5.119358235355663, - "learning_rate": 2.6907320618962312e-06, - "loss": 0.6069, + "epoch": 0.45, + "grad_norm": 2.0053015185415046, + "learning_rate": 6.088102975972856e-06, + "loss": 0.5304, "step": 6303 }, { - "epoch": 0.66, - "grad_norm": 2.52908697678268, - "learning_rate": 2.6892206114867402e-06, - "loss": 0.5663, + "epoch": 0.45, + "grad_norm": 2.488078859494076, + "learning_rate": 6.086981321856112e-06, + "loss": 0.5887, "step": 6304 }, { - "epoch": 0.66, - "grad_norm": 2.6044059425378716, - "learning_rate": 2.6877094295369167e-06, - "loss": 0.5864, + "epoch": 0.45, + "grad_norm": 1.840681360537423, + "learning_rate": 6.085859610321334e-06, + "loss": 0.5456, "step": 6305 }, { - "epoch": 0.66, - "grad_norm": 2.3129237260574933, - "learning_rate": 2.686198516222329e-06, - "loss": 0.6366, + "epoch": 0.45, + "grad_norm": 1.9257712119036055, + "learning_rate": 6.08473784142777e-06, + "loss": 0.5211, "step": 6306 }, { - "epoch": 0.66, - "grad_norm": 2.699401934868816, - "learning_rate": 2.6846878717185076e-06, - "loss": 0.5805, + "epoch": 0.45, + "grad_norm": 1.5997691107744978, + "learning_rate": 6.083616015234678e-06, + "loss": 0.5761, "step": 6307 }, { - "epoch": 0.66, - "grad_norm": 2.5503693982830806, - "learning_rate": 2.6831774962009582e-06, - "loss": 0.5873, + "epoch": 0.45, + "grad_norm": 1.6740624204383638, + "learning_rate": 6.082494131801315e-06, + "loss": 0.5651, "step": 6308 }, { - "epoch": 0.66, - "grad_norm": 2.3528557272139152, - "learning_rate": 2.6816673898451486e-06, - "loss": 0.6644, + "epoch": 0.45, + "grad_norm": 1.5847670008909105, + "learning_rate": 6.081372191186947e-06, + "loss": 0.5836, "step": 6309 }, { - "epoch": 0.66, - "grad_norm": 2.178316075395149, - "learning_rate": 2.680157552826519e-06, - "loss": 0.5794, + "epoch": 0.45, + "grad_norm": 1.5698393107218398, + "learning_rate": 6.0802501934508325e-06, + "loss": 0.5637, "step": 6310 }, { - "epoch": 0.66, - "grad_norm": 2.640485918878129, - "learning_rate": 2.6786479853204817e-06, - "loss": 0.7067, + "epoch": 0.45, + "grad_norm": 0.7372095157641763, + "learning_rate": 6.0791281386522435e-06, + "loss": 0.4823, "step": 6311 }, { - "epoch": 0.66, - "grad_norm": 2.296387374049739, - "learning_rate": 2.6771386875024087e-06, - "loss": 0.6266, + "epoch": 0.45, + "grad_norm": 1.6401589205168723, + "learning_rate": 6.07800602685045e-06, + "loss": 0.5839, "step": 6312 }, { - "epoch": 0.66, - "grad_norm": 2.1896654863553193, - "learning_rate": 2.6756296595476504e-06, - "loss": 0.5763, + "epoch": 0.45, + "grad_norm": 1.932359912596974, + "learning_rate": 6.076883858104725e-06, + "loss": 0.6, "step": 6313 }, { - "epoch": 0.66, - "grad_norm": 2.110961387612748, - "learning_rate": 2.674120901631517e-06, - "loss": 0.5546, + "epoch": 0.45, + "grad_norm": 1.6469076935301972, + "learning_rate": 6.075761632474346e-06, + "loss": 0.5111, "step": 6314 }, { - "epoch": 0.66, - "grad_norm": 3.028470459580835, - "learning_rate": 2.6726124139292964e-06, - "loss": 0.59, + "epoch": 0.45, + "grad_norm": 1.86445809810398, + "learning_rate": 6.074639350018593e-06, + "loss": 0.5258, "step": 6315 }, { - "epoch": 0.66, - "grad_norm": 2.3573162744903935, - "learning_rate": 2.6711041966162356e-06, - "loss": 0.638, + "epoch": 0.45, + "grad_norm": 2.1040131736615444, + "learning_rate": 6.073517010796746e-06, + "loss": 0.515, "step": 6316 }, { - "epoch": 0.66, - "grad_norm": 2.738834086837961, - "learning_rate": 2.6695962498675588e-06, - "loss": 0.6098, + "epoch": 0.45, + "grad_norm": 1.791324308720816, + "learning_rate": 6.072394614868094e-06, + "loss": 0.5708, "step": 6317 }, { - "epoch": 0.66, - "grad_norm": 2.3705691571110727, - "learning_rate": 2.6680885738584512e-06, - "loss": 0.6687, + "epoch": 0.45, + "grad_norm": 1.5879006715263964, + "learning_rate": 6.071272162291926e-06, + "loss": 0.5034, "step": 6318 }, { - "epoch": 0.66, - "grad_norm": 5.548261604744699, - "learning_rate": 2.6665811687640723e-06, - "loss": 0.6152, + "epoch": 0.45, + "grad_norm": 1.5722626614320403, + "learning_rate": 6.07014965312753e-06, + "loss": 0.5331, "step": 6319 }, { - "epoch": 0.67, - "grad_norm": 2.3601953378527485, - "learning_rate": 2.66507403475955e-06, - "loss": 0.5781, + "epoch": 0.45, + "grad_norm": 1.662804579069011, + "learning_rate": 6.069027087434205e-06, + "loss": 0.6106, "step": 6320 }, { - "epoch": 0.67, - "grad_norm": 2.7282332847963287, - "learning_rate": 2.663567172019977e-06, - "loss": 0.6588, + "epoch": 0.45, + "grad_norm": 1.5399928426239344, + "learning_rate": 6.067904465271246e-06, + "loss": 0.5227, "step": 6321 }, { - "epoch": 0.67, - "grad_norm": 3.647581964093641, - "learning_rate": 2.6620605807204134e-06, - "loss": 0.5947, + "epoch": 0.45, + "grad_norm": 1.3013792198802103, + "learning_rate": 6.066781786697956e-06, + "loss": 0.4858, "step": 6322 }, { - "epoch": 0.67, - "grad_norm": 2.55204790802501, - "learning_rate": 2.660554261035894e-06, - "loss": 0.6317, + "epoch": 0.45, + "grad_norm": 1.796472118638848, + "learning_rate": 6.065659051773636e-06, + "loss": 0.4609, "step": 6323 }, { - "epoch": 0.67, - "grad_norm": 2.794476567030957, - "learning_rate": 2.659048213141419e-06, - "loss": 0.5403, + "epoch": 0.45, + "grad_norm": 1.9458156447020258, + "learning_rate": 6.064536260557595e-06, + "loss": 0.4412, "step": 6324 }, { - "epoch": 0.67, - "grad_norm": 2.5186567163194873, - "learning_rate": 2.657542437211954e-06, - "loss": 0.6475, + "epoch": 0.45, + "grad_norm": 0.6496237951436457, + "learning_rate": 6.063413413109141e-06, + "loss": 0.4418, "step": 6325 }, { - "epoch": 0.67, - "grad_norm": 2.715952090453819, - "learning_rate": 2.6560369334224396e-06, - "loss": 0.565, + "epoch": 0.45, + "grad_norm": 1.8212255812459504, + "learning_rate": 6.062290509487586e-06, + "loss": 0.4879, "step": 6326 }, { - "epoch": 0.67, - "grad_norm": 2.700955615524671, - "learning_rate": 2.6545317019477764e-06, - "loss": 0.6937, + "epoch": 0.45, + "grad_norm": 1.6328838466348194, + "learning_rate": 6.061167549752247e-06, + "loss": 0.5199, "step": 6327 }, { - "epoch": 0.67, - "grad_norm": 3.7950586784557565, - "learning_rate": 2.653026742962842e-06, - "loss": 0.6282, + "epoch": 0.45, + "grad_norm": 1.5251049366927516, + "learning_rate": 6.060044533962444e-06, + "loss": 0.5383, "step": 6328 }, { - "epoch": 0.67, - "grad_norm": 1.932271397052837, - "learning_rate": 2.6515220566424735e-06, - "loss": 0.5145, + "epoch": 0.45, + "grad_norm": 1.7937667289398975, + "learning_rate": 6.0589214621774964e-06, + "loss": 0.5383, "step": 6329 }, { - "epoch": 0.67, - "grad_norm": 2.426433667015017, - "learning_rate": 2.6500176431614866e-06, - "loss": 0.6701, + "epoch": 0.45, + "grad_norm": 1.6970897671626783, + "learning_rate": 6.0577983344567286e-06, + "loss": 0.5787, "step": 6330 }, { - "epoch": 0.67, - "grad_norm": 2.267463500168662, - "learning_rate": 2.6485135026946545e-06, - "loss": 0.62, + "epoch": 0.45, + "grad_norm": 2.0208213777286996, + "learning_rate": 6.056675150859468e-06, + "loss": 0.5895, "step": 6331 }, { - "epoch": 0.67, - "grad_norm": 2.4952915986161748, - "learning_rate": 2.6470096354167264e-06, - "loss": 0.6529, + "epoch": 0.45, + "grad_norm": 1.9150070801513752, + "learning_rate": 6.055551911445045e-06, + "loss": 0.4711, "step": 6332 }, { - "epoch": 0.67, - "grad_norm": 4.28610014397335, - "learning_rate": 2.645506041502419e-06, - "loss": 0.5839, + "epoch": 0.45, + "grad_norm": 1.9756203576750297, + "learning_rate": 6.054428616272794e-06, + "loss": 0.5576, "step": 6333 }, { - "epoch": 0.67, - "grad_norm": 2.8188123888897585, - "learning_rate": 2.644002721126413e-06, - "loss": 0.6255, + "epoch": 0.45, + "grad_norm": 1.7700714709535976, + "learning_rate": 6.053305265402049e-06, + "loss": 0.5589, "step": 6334 }, { - "epoch": 0.67, - "grad_norm": 3.831571559662532, - "learning_rate": 2.642499674463359e-06, - "loss": 0.536, + "epoch": 0.45, + "grad_norm": 1.738138287283359, + "learning_rate": 6.052181858892155e-06, + "loss": 0.568, "step": 6335 }, { - "epoch": 0.67, - "grad_norm": 2.58837692606335, - "learning_rate": 2.640996901687878e-06, - "loss": 0.6061, + "epoch": 0.45, + "grad_norm": 1.4521938960582153, + "learning_rate": 6.051058396802446e-06, + "loss": 0.4519, "step": 6336 }, { - "epoch": 0.67, - "grad_norm": 2.3253447670569956, - "learning_rate": 2.6394944029745594e-06, - "loss": 0.6083, + "epoch": 0.45, + "grad_norm": 2.167688616087516, + "learning_rate": 6.049934879192274e-06, + "loss": 0.5401, "step": 6337 }, { - "epoch": 0.67, - "grad_norm": 3.1161500571551355, - "learning_rate": 2.6379921784979556e-06, - "loss": 0.6626, + "epoch": 0.45, + "grad_norm": 2.8992867114857983, + "learning_rate": 6.048811306120982e-06, + "loss": 0.5117, "step": 6338 }, { - "epoch": 0.67, - "grad_norm": 2.3957780596036167, - "learning_rate": 2.6364902284325943e-06, - "loss": 0.5869, + "epoch": 0.45, + "grad_norm": 1.8519241186657522, + "learning_rate": 6.047687677647924e-06, + "loss": 0.6114, "step": 6339 }, { - "epoch": 0.67, - "grad_norm": 3.14685635706374, - "learning_rate": 2.634988552952965e-06, - "loss": 0.6182, + "epoch": 0.45, + "grad_norm": 0.8043450710659489, + "learning_rate": 6.04656399383245e-06, + "loss": 0.4283, "step": 6340 }, { - "epoch": 0.67, - "grad_norm": 6.175501966326075, - "learning_rate": 2.63348715223353e-06, - "loss": 0.5976, + "epoch": 0.45, + "grad_norm": 1.7820317145615474, + "learning_rate": 6.045440254733923e-06, + "loss": 0.5933, "step": 6341 }, { - "epoch": 0.67, - "grad_norm": 3.2865872103188165, - "learning_rate": 2.6319860264487156e-06, - "loss": 0.6499, + "epoch": 0.45, + "grad_norm": 1.4899686775862293, + "learning_rate": 6.044316460411698e-06, + "loss": 0.5426, "step": 6342 }, { - "epoch": 0.67, - "grad_norm": 4.282251344333547, - "learning_rate": 2.630485175772921e-06, - "loss": 0.6299, + "epoch": 0.45, + "grad_norm": 1.6887543426235077, + "learning_rate": 6.04319261092514e-06, + "loss": 0.5731, "step": 6343 }, { - "epoch": 0.67, - "grad_norm": 2.959066483609487, - "learning_rate": 2.6289846003805073e-06, - "loss": 0.7163, + "epoch": 0.45, + "grad_norm": 1.5139654881037894, + "learning_rate": 6.0420687063336115e-06, + "loss": 0.5528, "step": 6344 }, { - "epoch": 0.67, - "grad_norm": 2.6533855270391533, - "learning_rate": 2.6274843004458083e-06, - "loss": 0.6091, + "epoch": 0.45, + "grad_norm": 1.45012442475435, + "learning_rate": 6.040944746696484e-06, + "loss": 0.559, "step": 6345 }, { - "epoch": 0.67, - "grad_norm": 2.092500503413888, - "learning_rate": 2.6259842761431275e-06, - "loss": 0.6512, + "epoch": 0.45, + "grad_norm": 1.8451289691944497, + "learning_rate": 6.039820732073128e-06, + "loss": 0.5006, "step": 6346 }, { - "epoch": 0.67, - "grad_norm": 11.63033721500452, - "learning_rate": 2.624484527646729e-06, - "loss": 0.5766, + "epoch": 0.45, + "grad_norm": 2.2048477445393, + "learning_rate": 6.038696662522917e-06, + "loss": 0.5922, "step": 6347 }, { - "epoch": 0.67, - "grad_norm": 2.344316205856234, - "learning_rate": 2.6229850551308533e-06, - "loss": 0.6893, + "epoch": 0.45, + "grad_norm": 1.5770085826218494, + "learning_rate": 6.037572538105228e-06, + "loss": 0.5098, "step": 6348 }, { - "epoch": 0.67, - "grad_norm": 2.7670480341648984, - "learning_rate": 2.6214858587697e-06, - "loss": 0.6076, + "epoch": 0.45, + "grad_norm": 1.7269173318399798, + "learning_rate": 6.0364483588794445e-06, + "loss": 0.5359, "step": 6349 }, { - "epoch": 0.67, - "grad_norm": 2.7552634595866894, - "learning_rate": 2.6199869387374465e-06, - "loss": 0.626, + "epoch": 0.45, + "grad_norm": 1.9730618275933753, + "learning_rate": 6.035324124904944e-06, + "loss": 0.5854, "step": 6350 }, { - "epoch": 0.67, - "grad_norm": 2.087916955904863, - "learning_rate": 2.6184882952082286e-06, - "loss": 0.6331, + "epoch": 0.45, + "grad_norm": 1.7972258596981647, + "learning_rate": 6.034199836241116e-06, + "loss": 0.5806, "step": 6351 }, { - "epoch": 0.67, - "grad_norm": 2.2553742098034797, - "learning_rate": 2.616989928356158e-06, - "loss": 0.6029, + "epoch": 0.45, + "grad_norm": 2.81097451051854, + "learning_rate": 6.033075492947349e-06, + "loss": 0.519, "step": 6352 }, { - "epoch": 0.67, - "grad_norm": 2.4877624793792266, - "learning_rate": 2.6154918383553075e-06, - "loss": 0.5829, + "epoch": 0.45, + "grad_norm": 1.6040128263423006, + "learning_rate": 6.031951095083033e-06, + "loss": 0.5337, "step": 6353 }, { - "epoch": 0.67, - "grad_norm": 3.164663373664098, - "learning_rate": 2.6139940253797237e-06, - "loss": 0.5857, + "epoch": 0.45, + "grad_norm": 1.5692517052292783, + "learning_rate": 6.030826642707564e-06, + "loss": 0.4745, "step": 6354 }, { - "epoch": 0.67, - "grad_norm": 2.2967240401034346, - "learning_rate": 2.6124964896034143e-06, - "loss": 0.5886, + "epoch": 0.45, + "grad_norm": 1.5214691951475245, + "learning_rate": 6.029702135880337e-06, + "loss": 0.5143, "step": 6355 }, { - "epoch": 0.67, - "grad_norm": 4.099408237564359, - "learning_rate": 2.610999231200364e-06, - "loss": 0.6851, + "epoch": 0.45, + "grad_norm": 0.734691259803567, + "learning_rate": 6.028577574660756e-06, + "loss": 0.4727, "step": 6356 }, { - "epoch": 0.67, - "grad_norm": 2.3689665901694243, - "learning_rate": 2.6095022503445155e-06, - "loss": 0.6386, + "epoch": 0.45, + "grad_norm": 1.7164708405942686, + "learning_rate": 6.027452959108222e-06, + "loss": 0.4784, "step": 6357 }, { - "epoch": 0.67, - "grad_norm": 2.2222788180650475, - "learning_rate": 2.6080055472097844e-06, - "loss": 0.638, + "epoch": 0.45, + "grad_norm": 2.4412936711761835, + "learning_rate": 6.026328289282141e-06, + "loss": 0.5755, "step": 6358 }, { - "epoch": 0.67, - "grad_norm": 2.4258550761659854, - "learning_rate": 2.6065091219700568e-06, - "loss": 0.5755, + "epoch": 0.45, + "grad_norm": 1.8882605839140438, + "learning_rate": 6.025203565241922e-06, + "loss": 0.6094, "step": 6359 }, { - "epoch": 0.67, - "grad_norm": 2.5822158507581823, - "learning_rate": 2.605012974799178e-06, - "loss": 0.6109, + "epoch": 0.45, + "grad_norm": 1.9902520516716906, + "learning_rate": 6.024078787046979e-06, + "loss": 0.5202, "step": 6360 }, { - "epoch": 0.67, - "grad_norm": 2.344258642918491, - "learning_rate": 2.603517105870971e-06, - "loss": 0.5318, + "epoch": 0.45, + "grad_norm": 1.6659585066847027, + "learning_rate": 6.022953954756721e-06, + "loss": 0.5709, "step": 6361 }, { - "epoch": 0.67, - "grad_norm": 3.661742078883693, - "learning_rate": 2.602021515359218e-06, - "loss": 0.5598, + "epoch": 0.45, + "grad_norm": 1.4692642953293527, + "learning_rate": 6.021829068430569e-06, + "loss": 0.5096, "step": 6362 }, { - "epoch": 0.67, - "grad_norm": 2.2545924179160295, - "learning_rate": 2.600526203437674e-06, - "loss": 0.5684, + "epoch": 0.45, + "grad_norm": 1.6144990941518313, + "learning_rate": 6.020704128127945e-06, + "loss": 0.5332, "step": 6363 }, { - "epoch": 0.67, - "grad_norm": 2.7470556591551687, - "learning_rate": 2.5990311702800573e-06, - "loss": 0.6562, + "epoch": 0.45, + "grad_norm": 1.5638504051059585, + "learning_rate": 6.0195791339082696e-06, + "loss": 0.4656, "step": 6364 }, { - "epoch": 0.67, - "grad_norm": 2.130529569635912, - "learning_rate": 2.597536416060062e-06, - "loss": 0.6661, + "epoch": 0.45, + "grad_norm": 1.7115928554717594, + "learning_rate": 6.018454085830969e-06, + "loss": 0.4672, "step": 6365 }, { - "epoch": 0.67, - "grad_norm": 2.848914985108963, - "learning_rate": 2.5960419409513386e-06, - "loss": 0.4984, + "epoch": 0.45, + "grad_norm": 6.080648547201388, + "learning_rate": 6.017328983955475e-06, + "loss": 0.5645, "step": 6366 }, { - "epoch": 0.67, - "grad_norm": 5.47331013855302, - "learning_rate": 2.594547745127514e-06, - "loss": 0.6535, + "epoch": 0.45, + "grad_norm": 2.0484814546379595, + "learning_rate": 6.016203828341214e-06, + "loss": 0.6072, "step": 6367 }, { - "epoch": 0.67, - "grad_norm": 2.1487059851875383, - "learning_rate": 2.5930538287621797e-06, - "loss": 0.6188, + "epoch": 0.45, + "grad_norm": 1.471978090879093, + "learning_rate": 6.015078619047625e-06, + "loss": 0.459, "step": 6368 }, { - "epoch": 0.67, - "grad_norm": 2.2329154737791517, - "learning_rate": 2.591560192028894e-06, - "loss": 0.5626, + "epoch": 0.45, + "grad_norm": 2.3633706889560853, + "learning_rate": 6.013953356134142e-06, + "loss": 0.5459, "step": 6369 }, { - "epoch": 0.67, - "grad_norm": 2.2654061918388013, - "learning_rate": 2.5900668351011815e-06, - "loss": 0.6215, + "epoch": 0.45, + "grad_norm": 2.1774102808482394, + "learning_rate": 6.012828039660208e-06, + "loss": 0.5975, "step": 6370 }, { - "epoch": 0.67, - "grad_norm": 2.256650550756588, - "learning_rate": 2.588573758152538e-06, - "loss": 0.631, + "epoch": 0.45, + "grad_norm": 1.5005848097500631, + "learning_rate": 6.011702669685266e-06, + "loss": 0.4591, "step": 6371 }, { - "epoch": 0.67, - "grad_norm": 2.001940811368443, - "learning_rate": 2.5870809613564264e-06, - "loss": 0.615, + "epoch": 0.45, + "grad_norm": 1.8356678940819122, + "learning_rate": 6.0105772462687605e-06, + "loss": 0.5203, "step": 6372 }, { - "epoch": 0.67, - "grad_norm": 5.097482873027538, - "learning_rate": 2.585588444886271e-06, - "loss": 0.6168, + "epoch": 0.45, + "grad_norm": 1.581454818676609, + "learning_rate": 6.00945176947014e-06, + "loss": 0.5355, "step": 6373 }, { - "epoch": 0.67, - "grad_norm": 2.401718053393286, - "learning_rate": 2.584096208915473e-06, - "loss": 0.6036, + "epoch": 0.45, + "grad_norm": 1.6336609191507736, + "learning_rate": 6.008326239348857e-06, + "loss": 0.6264, "step": 6374 }, { - "epoch": 0.67, - "grad_norm": 2.7170902456231816, - "learning_rate": 2.5826042536173923e-06, - "loss": 0.6435, + "epoch": 0.45, + "grad_norm": 1.914035863841177, + "learning_rate": 6.007200655964366e-06, + "loss": 0.5173, "step": 6375 }, { - "epoch": 0.67, - "grad_norm": 2.0656142552031445, - "learning_rate": 2.581112579165363e-06, - "loss": 0.6798, + "epoch": 0.45, + "grad_norm": 1.9260479448436225, + "learning_rate": 6.006075019376122e-06, + "loss": 0.6137, "step": 6376 }, { - "epoch": 0.67, - "grad_norm": 2.885892388490017, - "learning_rate": 2.5796211857326805e-06, - "loss": 0.5807, + "epoch": 0.45, + "grad_norm": 1.9273434159181793, + "learning_rate": 6.004949329643587e-06, + "loss": 0.5852, "step": 6377 }, { - "epoch": 0.67, - "grad_norm": 2.357443586015753, - "learning_rate": 2.578130073492613e-06, - "loss": 0.5938, + "epoch": 0.45, + "grad_norm": 1.4489679471444523, + "learning_rate": 6.003823586826223e-06, + "loss": 0.5074, "step": 6378 }, { - "epoch": 0.67, - "grad_norm": 2.3457746446640377, - "learning_rate": 2.576639242618391e-06, - "loss": 0.6046, + "epoch": 0.45, + "grad_norm": 1.6810635269129035, + "learning_rate": 6.002697790983496e-06, + "loss": 0.4575, "step": 6379 }, { - "epoch": 0.67, - "grad_norm": 1.0130372996055184, - "learning_rate": 2.575148693283217e-06, - "loss": 0.5469, + "epoch": 0.45, + "grad_norm": 1.8108569238987724, + "learning_rate": 6.0015719421748745e-06, + "loss": 0.5, "step": 6380 }, { - "epoch": 0.67, - "grad_norm": 2.2471983613829685, - "learning_rate": 2.5736584256602604e-06, - "loss": 0.6914, + "epoch": 0.45, + "grad_norm": 1.78871080938493, + "learning_rate": 6.000446040459828e-06, + "loss": 0.5339, "step": 6381 }, { - "epoch": 0.67, - "grad_norm": 2.8049842929766076, - "learning_rate": 2.572168439922653e-06, - "loss": 0.6207, + "epoch": 0.45, + "grad_norm": 1.6068370487258503, + "learning_rate": 5.999320085897833e-06, + "loss": 0.5133, "step": 6382 }, { - "epoch": 0.67, - "grad_norm": 2.5242050580331536, - "learning_rate": 2.570678736243497e-06, - "loss": 0.5769, + "epoch": 0.45, + "grad_norm": 1.8735702384117092, + "learning_rate": 5.9981940785483635e-06, + "loss": 0.5522, "step": 6383 }, { - "epoch": 0.67, - "grad_norm": 2.7371668921754995, - "learning_rate": 2.569189314795863e-06, - "loss": 0.6394, + "epoch": 0.45, + "grad_norm": 1.8079265609944464, + "learning_rate": 5.997068018470902e-06, + "loss": 0.5622, "step": 6384 }, { - "epoch": 0.67, - "grad_norm": 2.541518894090096, - "learning_rate": 2.56770017575279e-06, - "loss": 0.5863, + "epoch": 0.45, + "grad_norm": 1.780889254395183, + "learning_rate": 5.9959419057249276e-06, + "loss": 0.4697, "step": 6385 }, { - "epoch": 0.67, - "grad_norm": 2.104105510648952, - "learning_rate": 2.566211319287276e-06, - "loss": 0.5289, + "epoch": 0.45, + "grad_norm": 1.9088008489846524, + "learning_rate": 5.9948157403699295e-06, + "loss": 0.5964, "step": 6386 }, { - "epoch": 0.67, - "grad_norm": 2.5996518622471667, - "learning_rate": 2.564722745572299e-06, - "loss": 0.6135, + "epoch": 0.45, + "grad_norm": 1.8044304644326736, + "learning_rate": 5.99368952246539e-06, + "loss": 0.529, "step": 6387 }, { - "epoch": 0.67, - "grad_norm": 2.2459414237155215, - "learning_rate": 2.563234454780791e-06, - "loss": 0.5344, + "epoch": 0.45, + "grad_norm": 2.2119266590313384, + "learning_rate": 5.992563252070805e-06, + "loss": 0.5228, "step": 6388 }, { - "epoch": 0.67, - "grad_norm": 2.6467790790692405, - "learning_rate": 2.561746447085662e-06, - "loss": 0.5519, + "epoch": 0.45, + "grad_norm": 1.8190107729215494, + "learning_rate": 5.991436929245665e-06, + "loss": 0.5994, "step": 6389 }, { - "epoch": 0.67, - "grad_norm": 2.9589463310028505, - "learning_rate": 2.5602587226597813e-06, - "loss": 0.61, + "epoch": 0.45, + "grad_norm": 1.6999338400312642, + "learning_rate": 5.990310554049468e-06, + "loss": 0.6356, "step": 6390 }, { - "epoch": 0.67, - "grad_norm": 2.81740329183419, - "learning_rate": 2.5587712816759914e-06, - "loss": 0.6797, + "epoch": 0.45, + "grad_norm": 1.8751631482818127, + "learning_rate": 5.98918412654171e-06, + "loss": 0.5868, "step": 6391 }, { - "epoch": 0.67, - "grad_norm": 2.4235942299105417, - "learning_rate": 2.5572841243070944e-06, - "loss": 0.6292, + "epoch": 0.45, + "grad_norm": 1.7434120009414837, + "learning_rate": 5.988057646781896e-06, + "loss": 0.5519, "step": 6392 }, { - "epoch": 0.67, - "grad_norm": 2.7257768033845178, - "learning_rate": 2.5557972507258676e-06, - "loss": 0.5951, + "epoch": 0.45, + "grad_norm": 1.9212804413581503, + "learning_rate": 5.986931114829528e-06, + "loss": 0.5263, "step": 6393 }, { - "epoch": 0.67, - "grad_norm": 2.9980654800137065, - "learning_rate": 2.554310661105052e-06, - "loss": 0.5879, + "epoch": 0.45, + "grad_norm": 3.4072162119756433, + "learning_rate": 5.9858045307441155e-06, + "loss": 0.5127, "step": 6394 }, { - "epoch": 0.67, - "grad_norm": 2.45714129335474, - "learning_rate": 2.5528243556173526e-06, - "loss": 0.5924, + "epoch": 0.45, + "grad_norm": 1.8318166137598089, + "learning_rate": 5.984677894585167e-06, + "loss": 0.5263, "step": 6395 }, { - "epoch": 0.67, - "grad_norm": 2.524381334524273, - "learning_rate": 2.551338334435447e-06, - "loss": 0.5851, + "epoch": 0.45, + "grad_norm": 0.7935450901203088, + "learning_rate": 5.983551206412196e-06, + "loss": 0.4322, "step": 6396 }, { - "epoch": 0.67, - "grad_norm": 2.2895599260578168, - "learning_rate": 2.549852597731973e-06, - "loss": 0.6139, + "epoch": 0.45, + "grad_norm": 2.4268643914625847, + "learning_rate": 5.982424466284717e-06, + "loss": 0.58, "step": 6397 }, { - "epoch": 0.67, - "grad_norm": 2.2734502037433173, - "learning_rate": 2.5483671456795446e-06, - "loss": 0.5641, + "epoch": 0.45, + "grad_norm": 2.9115997839143635, + "learning_rate": 5.981297674262249e-06, + "loss": 0.4982, "step": 6398 }, { - "epoch": 0.67, - "grad_norm": 4.10732024204948, - "learning_rate": 2.5468819784507314e-06, - "loss": 0.6338, + "epoch": 0.45, + "grad_norm": 2.0955492552091592, + "learning_rate": 5.980170830404312e-06, + "loss": 0.5549, "step": 6399 }, { - "epoch": 0.67, - "grad_norm": 2.831440417203488, - "learning_rate": 2.545397096218081e-06, - "loss": 0.632, + "epoch": 0.45, + "grad_norm": 2.055557865680107, + "learning_rate": 5.97904393477043e-06, + "loss": 0.6038, "step": 6400 }, { - "epoch": 0.67, - "grad_norm": 2.2175477294639503, - "learning_rate": 2.5439124991540986e-06, - "loss": 0.5228, + "epoch": 0.45, + "grad_norm": 0.715264243646855, + "learning_rate": 5.977916987420132e-06, + "loss": 0.4559, "step": 6401 }, { - "epoch": 0.67, - "grad_norm": 11.26293371493835, - "learning_rate": 2.5424281874312616e-06, - "loss": 0.638, + "epoch": 0.45, + "grad_norm": 4.113281320147073, + "learning_rate": 5.976789988412944e-06, + "loss": 0.5746, "step": 6402 }, { - "epoch": 0.67, - "grad_norm": 3.070512413502359, - "learning_rate": 2.5409441612220163e-06, - "loss": 0.5331, + "epoch": 0.45, + "grad_norm": 1.807767601593898, + "learning_rate": 5.975662937808399e-06, + "loss": 0.5502, "step": 6403 }, { - "epoch": 0.67, - "grad_norm": 2.43982362945709, - "learning_rate": 2.53946042069877e-06, - "loss": 0.682, + "epoch": 0.45, + "grad_norm": 1.7918910403352066, + "learning_rate": 5.974535835666031e-06, + "loss": 0.5638, "step": 6404 }, { - "epoch": 0.67, - "grad_norm": 2.5481585409694536, - "learning_rate": 2.537976966033897e-06, - "loss": 0.6913, + "epoch": 0.45, + "grad_norm": 1.9171503401807197, + "learning_rate": 5.97340868204538e-06, + "loss": 0.5317, "step": 6405 }, { - "epoch": 0.67, - "grad_norm": 2.780707259096246, - "learning_rate": 2.5364937973997433e-06, - "loss": 0.637, + "epoch": 0.45, + "grad_norm": 2.0244101954307583, + "learning_rate": 5.972281477005981e-06, + "loss": 0.5684, "step": 6406 }, { - "epoch": 0.67, - "grad_norm": 3.059638272022161, - "learning_rate": 2.535010914968621e-06, - "loss": 0.6197, + "epoch": 0.45, + "grad_norm": 1.9052062480536094, + "learning_rate": 5.971154220607381e-06, + "loss": 0.5518, "step": 6407 }, { - "epoch": 0.67, - "grad_norm": 4.198201640329154, - "learning_rate": 2.533528318912803e-06, - "loss": 0.544, + "epoch": 0.45, + "grad_norm": 1.5313546492520331, + "learning_rate": 5.9700269129091245e-06, + "loss": 0.5522, "step": 6408 }, { - "epoch": 0.67, - "grad_norm": 2.3114583069479973, - "learning_rate": 2.532046009404537e-06, - "loss": 0.5643, + "epoch": 0.45, + "grad_norm": 1.785359937144492, + "learning_rate": 5.968899553970762e-06, + "loss": 0.4378, "step": 6409 }, { - "epoch": 0.67, - "grad_norm": 4.0785166014115655, - "learning_rate": 2.5305639866160293e-06, - "loss": 0.6458, + "epoch": 0.45, + "grad_norm": 2.383542888183812, + "learning_rate": 5.967772143851839e-06, + "loss": 0.5821, "step": 6410 }, { - "epoch": 0.67, - "grad_norm": 2.4149803287414717, - "learning_rate": 2.529082250719461e-06, - "loss": 0.7269, + "epoch": 0.45, + "grad_norm": 0.8168834323566663, + "learning_rate": 5.966644682611914e-06, + "loss": 0.4291, "step": 6411 }, { - "epoch": 0.67, - "grad_norm": 2.160446913236817, - "learning_rate": 2.5276008018869722e-06, - "loss": 0.5832, + "epoch": 0.46, + "grad_norm": 1.6599904228026834, + "learning_rate": 5.965517170310539e-06, + "loss": 0.6025, "step": 6412 }, { - "epoch": 0.67, - "grad_norm": 2.6820344561724, - "learning_rate": 2.526119640290678e-06, - "loss": 0.5942, + "epoch": 0.46, + "grad_norm": 1.701475557055519, + "learning_rate": 5.964389607007277e-06, + "loss": 0.5292, "step": 6413 }, { - "epoch": 0.67, - "grad_norm": 2.559166061869424, - "learning_rate": 2.5246387661026504e-06, - "loss": 0.6239, + "epoch": 0.46, + "grad_norm": 1.714978858169787, + "learning_rate": 5.963261992761687e-06, + "loss": 0.4997, "step": 6414 }, { - "epoch": 0.68, - "grad_norm": 2.35834972171533, - "learning_rate": 2.5231581794949356e-06, - "loss": 0.535, + "epoch": 0.46, + "grad_norm": 1.522292367449255, + "learning_rate": 5.962134327633336e-06, + "loss": 0.5099, "step": 6415 }, { - "epoch": 0.68, - "grad_norm": 2.964214903325439, - "learning_rate": 2.5216778806395448e-06, - "loss": 0.6452, + "epoch": 0.46, + "grad_norm": 1.7324391241501949, + "learning_rate": 5.96100661168179e-06, + "loss": 0.6069, "step": 6416 }, { - "epoch": 0.68, - "grad_norm": 2.200465378179132, - "learning_rate": 2.520197869708454e-06, - "loss": 0.6412, + "epoch": 0.46, + "grad_norm": 1.8151828672976762, + "learning_rate": 5.959878844966618e-06, + "loss": 0.5318, "step": 6417 }, { - "epoch": 0.68, - "grad_norm": 2.5628414197927607, - "learning_rate": 2.518718146873605e-06, - "loss": 0.6189, + "epoch": 0.46, + "grad_norm": 1.8376300738454943, + "learning_rate": 5.9587510275473924e-06, + "loss": 0.5405, "step": 6418 }, { - "epoch": 0.68, - "grad_norm": 2.3739952466153516, - "learning_rate": 2.5172387123069085e-06, - "loss": 0.5864, + "epoch": 0.46, + "grad_norm": 2.1879205383186546, + "learning_rate": 5.957623159483689e-06, + "loss": 0.5596, "step": 6419 }, { - "epoch": 0.68, - "grad_norm": 2.2088475058648496, - "learning_rate": 2.5157595661802437e-06, - "loss": 0.6126, + "epoch": 0.46, + "grad_norm": 1.6111426076499447, + "learning_rate": 5.956495240835085e-06, + "loss": 0.4993, "step": 6420 }, { - "epoch": 0.68, - "grad_norm": 2.8096193612985947, - "learning_rate": 2.514280708665449e-06, - "loss": 0.48, + "epoch": 0.46, + "grad_norm": 1.7668998612240656, + "learning_rate": 5.955367271661161e-06, + "loss": 0.5014, "step": 6421 }, { - "epoch": 0.68, - "grad_norm": 1.8758991836794494, - "learning_rate": 2.5128021399343385e-06, - "loss": 0.601, + "epoch": 0.46, + "grad_norm": 1.5036456377043879, + "learning_rate": 5.954239252021502e-06, + "loss": 0.4486, "step": 6422 }, { - "epoch": 0.68, - "grad_norm": 4.354943435805703, - "learning_rate": 2.5113238601586833e-06, - "loss": 0.5763, + "epoch": 0.46, + "grad_norm": 1.894129421554375, + "learning_rate": 5.953111181975692e-06, + "loss": 0.567, "step": 6423 }, { - "epoch": 0.68, - "grad_norm": 0.8991492339114561, - "learning_rate": 2.50984586951023e-06, - "loss": 0.571, + "epoch": 0.46, + "grad_norm": 0.772162357585521, + "learning_rate": 5.95198306158332e-06, + "loss": 0.449, "step": 6424 }, { - "epoch": 0.68, - "grad_norm": 2.988868169199592, - "learning_rate": 2.508368168160683e-06, - "loss": 0.6109, + "epoch": 0.46, + "grad_norm": 3.185759438050489, + "learning_rate": 5.950854890903975e-06, + "loss": 0.5738, "step": 6425 }, { - "epoch": 0.68, - "grad_norm": 2.0181480966879133, - "learning_rate": 2.5068907562817223e-06, - "loss": 0.6475, + "epoch": 0.46, + "grad_norm": 2.6700256299841554, + "learning_rate": 5.949726669997256e-06, + "loss": 0.5517, "step": 6426 }, { - "epoch": 0.68, - "grad_norm": 2.607017873285922, - "learning_rate": 2.505413634044984e-06, - "loss": 0.6466, + "epoch": 0.46, + "grad_norm": 1.6459719032538305, + "learning_rate": 5.948598398922754e-06, + "loss": 0.5604, "step": 6427 }, { - "epoch": 0.68, - "grad_norm": 2.5325358752326745, - "learning_rate": 2.5039368016220795e-06, - "loss": 0.707, + "epoch": 0.46, + "grad_norm": 1.777195069504668, + "learning_rate": 5.94747007774007e-06, + "loss": 0.5309, "step": 6428 }, { - "epoch": 0.68, - "grad_norm": 5.495898484741088, - "learning_rate": 2.502460259184584e-06, - "loss": 0.6143, + "epoch": 0.46, + "grad_norm": 0.7153534840062844, + "learning_rate": 5.946341706508805e-06, + "loss": 0.4129, "step": 6429 }, { - "epoch": 0.68, - "grad_norm": 2.27497515485443, - "learning_rate": 2.500984006904035e-06, - "loss": 0.6036, + "epoch": 0.46, + "grad_norm": 2.227863363642028, + "learning_rate": 5.945213285288567e-06, + "loss": 0.535, "step": 6430 }, { - "epoch": 0.68, - "grad_norm": 5.384554479372134, - "learning_rate": 2.4995080449519383e-06, - "loss": 0.632, + "epoch": 0.46, + "grad_norm": 0.7419371023920691, + "learning_rate": 5.944084814138958e-06, + "loss": 0.4627, "step": 6431 }, { - "epoch": 0.68, - "grad_norm": 2.136269004392376, - "learning_rate": 2.498032373499769e-06, - "loss": 0.5979, + "epoch": 0.46, + "grad_norm": 0.7960311295047298, + "learning_rate": 5.942956293119592e-06, + "loss": 0.4552, "step": 6432 }, { - "epoch": 0.68, - "grad_norm": 0.9591439151746047, - "learning_rate": 2.496556992718968e-06, - "loss": 0.5752, + "epoch": 0.46, + "grad_norm": 1.6028805283375644, + "learning_rate": 5.94182772229008e-06, + "loss": 0.5572, "step": 6433 }, { - "epoch": 0.68, - "grad_norm": 2.625755986560923, - "learning_rate": 2.495081902780937e-06, - "loss": 0.6251, + "epoch": 0.46, + "grad_norm": 0.7344999540880032, + "learning_rate": 5.9406991017100344e-06, + "loss": 0.4678, "step": 6434 }, { - "epoch": 0.68, - "grad_norm": 2.6993322682560046, - "learning_rate": 2.4936071038570514e-06, - "loss": 0.6101, + "epoch": 0.46, + "grad_norm": 1.8242668279025658, + "learning_rate": 5.9395704314390755e-06, + "loss": 0.6057, "step": 6435 }, { - "epoch": 0.68, - "grad_norm": 3.606064994507977, - "learning_rate": 2.4921325961186455e-06, - "loss": 0.6113, + "epoch": 0.46, + "grad_norm": 0.7261870789752807, + "learning_rate": 5.938441711536822e-06, + "loss": 0.4401, "step": 6436 }, { - "epoch": 0.68, - "grad_norm": 2.8817794595876305, - "learning_rate": 2.490658379737025e-06, - "loss": 0.6152, + "epoch": 0.46, + "grad_norm": 3.8540429723911145, + "learning_rate": 5.9373129420628994e-06, + "loss": 0.6828, "step": 6437 }, { - "epoch": 0.68, - "grad_norm": 2.592817222457228, - "learning_rate": 2.489184454883462e-06, - "loss": 0.6595, + "epoch": 0.46, + "grad_norm": 1.6125222360160034, + "learning_rate": 5.936184123076929e-06, + "loss": 0.5378, "step": 6438 }, { - "epoch": 0.68, - "grad_norm": 3.112454316014022, - "learning_rate": 2.4877108217291913e-06, - "loss": 0.6833, + "epoch": 0.46, + "grad_norm": 1.8500386336443424, + "learning_rate": 5.935055254638543e-06, + "loss": 0.5348, "step": 6439 }, { - "epoch": 0.68, - "grad_norm": 2.089441060610151, - "learning_rate": 2.4862374804454127e-06, - "loss": 0.6656, + "epoch": 0.46, + "grad_norm": 2.0237995741430703, + "learning_rate": 5.933926336807369e-06, + "loss": 0.5435, "step": 6440 }, { - "epoch": 0.68, - "grad_norm": 2.1810446892787994, - "learning_rate": 2.484764431203297e-06, - "loss": 0.655, + "epoch": 0.46, + "grad_norm": 1.8116085279090233, + "learning_rate": 5.932797369643042e-06, + "loss": 0.5345, "step": 6441 }, { - "epoch": 0.68, - "grad_norm": 2.2182929541942453, - "learning_rate": 2.483291674173981e-06, - "loss": 0.6376, + "epoch": 0.46, + "grad_norm": 1.4960293755445644, + "learning_rate": 5.931668353205196e-06, + "loss": 0.5007, "step": 6442 }, { - "epoch": 0.68, - "grad_norm": 2.620256297362581, - "learning_rate": 2.4818192095285615e-06, - "loss": 0.6077, + "epoch": 0.46, + "grad_norm": 1.7637556711405256, + "learning_rate": 5.930539287553471e-06, + "loss": 0.5747, "step": 6443 }, { - "epoch": 0.68, - "grad_norm": 0.959409963615247, - "learning_rate": 2.4803470374381084e-06, - "loss": 0.5402, + "epoch": 0.46, + "grad_norm": 1.701291157491818, + "learning_rate": 5.929410172747507e-06, + "loss": 0.6108, "step": 6444 }, { - "epoch": 0.68, - "grad_norm": 2.690295499658671, - "learning_rate": 2.4788751580736516e-06, - "loss": 0.637, + "epoch": 0.46, + "grad_norm": 1.9899878206660269, + "learning_rate": 5.92828100884695e-06, + "loss": 0.5131, "step": 6445 }, { - "epoch": 0.68, - "grad_norm": 2.2246955758634246, - "learning_rate": 2.4774035716061924e-06, - "loss": 0.6423, + "epoch": 0.46, + "grad_norm": 2.0129797001662744, + "learning_rate": 5.927151795911444e-06, + "loss": 0.5285, "step": 6446 }, { - "epoch": 0.68, - "grad_norm": 2.4753333122148784, - "learning_rate": 2.4759322782066924e-06, - "loss": 0.6151, + "epoch": 0.46, + "grad_norm": 1.5313099691712024, + "learning_rate": 5.926022534000638e-06, + "loss": 0.4676, "step": 6447 }, { - "epoch": 0.68, - "grad_norm": 2.3261687913389704, - "learning_rate": 2.4744612780460863e-06, - "loss": 0.6234, + "epoch": 0.46, + "grad_norm": 1.7621705638801388, + "learning_rate": 5.924893223174185e-06, + "loss": 0.5032, "step": 6448 }, { - "epoch": 0.68, - "grad_norm": 2.050091521356472, - "learning_rate": 2.472990571295266e-06, - "loss": 0.528, + "epoch": 0.46, + "grad_norm": 1.6754084396537015, + "learning_rate": 5.923763863491737e-06, + "loss": 0.5567, "step": 6449 }, { - "epoch": 0.68, - "grad_norm": 2.266129287841651, - "learning_rate": 2.4715201581250962e-06, - "loss": 0.6456, + "epoch": 0.46, + "grad_norm": 1.7431475508451586, + "learning_rate": 5.922634455012952e-06, + "loss": 0.5231, "step": 6450 }, { - "epoch": 0.68, - "grad_norm": 2.5040000923789902, - "learning_rate": 2.4700500387064074e-06, - "loss": 0.6231, + "epoch": 0.46, + "grad_norm": 4.223515725670235, + "learning_rate": 5.9215049977974885e-06, + "loss": 0.5907, "step": 6451 }, { - "epoch": 0.68, - "grad_norm": 2.405010632193548, - "learning_rate": 2.4685802132099923e-06, - "loss": 0.5844, + "epoch": 0.46, + "grad_norm": 1.6987386506378932, + "learning_rate": 5.920375491905009e-06, + "loss": 0.5577, "step": 6452 }, { - "epoch": 0.68, - "grad_norm": 2.6378566394399967, - "learning_rate": 2.4671106818066076e-06, - "loss": 0.6313, + "epoch": 0.46, + "grad_norm": 2.408567243086743, + "learning_rate": 5.919245937395177e-06, + "loss": 0.5847, "step": 6453 }, { - "epoch": 0.68, - "grad_norm": 2.429331136222701, - "learning_rate": 2.465641444666983e-06, - "loss": 0.6776, + "epoch": 0.46, + "grad_norm": 1.4083150243281561, + "learning_rate": 5.9181163343276615e-06, + "loss": 0.4772, "step": 6454 }, { - "epoch": 0.68, - "grad_norm": 3.3515232887180457, - "learning_rate": 2.4641725019618107e-06, - "loss": 0.5311, + "epoch": 0.46, + "grad_norm": 1.6395284349959505, + "learning_rate": 5.916986682762128e-06, + "loss": 0.5641, "step": 6455 }, { - "epoch": 0.68, - "grad_norm": 2.516974554330432, - "learning_rate": 2.4627038538617447e-06, - "loss": 0.5724, + "epoch": 0.46, + "grad_norm": 0.7530802695977581, + "learning_rate": 5.9158569827582525e-06, + "loss": 0.4602, "step": 6456 }, { - "epoch": 0.68, - "grad_norm": 2.1215303565599735, - "learning_rate": 2.461235500537412e-06, - "loss": 0.5501, + "epoch": 0.46, + "grad_norm": 1.6704504379518457, + "learning_rate": 5.9147272343757055e-06, + "loss": 0.4974, "step": 6457 }, { - "epoch": 0.68, - "grad_norm": 2.0665664683899, - "learning_rate": 2.4597674421593985e-06, - "loss": 0.6118, + "epoch": 0.46, + "grad_norm": 2.497360090310237, + "learning_rate": 5.9135974376741674e-06, + "loss": 0.613, "step": 6458 }, { - "epoch": 0.68, - "grad_norm": 2.723504300822161, - "learning_rate": 2.458299678898263e-06, - "loss": 0.6764, + "epoch": 0.46, + "grad_norm": 1.8003183315980518, + "learning_rate": 5.912467592713318e-06, + "loss": 0.56, "step": 6459 }, { - "epoch": 0.68, - "grad_norm": 2.4054502421315638, - "learning_rate": 2.456832210924521e-06, - "loss": 0.6773, + "epoch": 0.46, + "grad_norm": 1.791665458662133, + "learning_rate": 5.911337699552838e-06, + "loss": 0.5403, "step": 6460 }, { - "epoch": 0.68, - "grad_norm": 2.408884797009261, - "learning_rate": 2.455365038408663e-06, - "loss": 0.6166, + "epoch": 0.46, + "grad_norm": 1.6732740987392551, + "learning_rate": 5.910207758252412e-06, + "loss": 0.4911, "step": 6461 }, { - "epoch": 0.68, - "grad_norm": 2.446288286832739, - "learning_rate": 2.453898161521137e-06, - "loss": 0.703, + "epoch": 0.46, + "grad_norm": 1.766311472230727, + "learning_rate": 5.90907776887173e-06, + "loss": 0.5282, "step": 6462 }, { - "epoch": 0.68, - "grad_norm": 2.2957817402286795, - "learning_rate": 2.4524315804323627e-06, - "loss": 0.6984, + "epoch": 0.46, + "grad_norm": 1.69180903044053, + "learning_rate": 5.907947731470477e-06, + "loss": 0.5267, "step": 6463 }, { - "epoch": 0.68, - "grad_norm": 2.401524156010693, - "learning_rate": 2.4509652953127257e-06, - "loss": 0.6117, + "epoch": 0.46, + "grad_norm": 1.671977248328302, + "learning_rate": 5.90681764610835e-06, + "loss": 0.6076, "step": 6464 }, { - "epoch": 0.68, - "grad_norm": 3.328086050605523, - "learning_rate": 2.4494993063325716e-06, - "loss": 0.5302, + "epoch": 0.46, + "grad_norm": 2.0030861543779, + "learning_rate": 5.905687512845041e-06, + "loss": 0.5823, "step": 6465 }, { - "epoch": 0.68, - "grad_norm": 2.358380771233323, - "learning_rate": 2.4480336136622133e-06, - "loss": 0.6143, + "epoch": 0.46, + "grad_norm": 1.7783420368794396, + "learning_rate": 5.904557331740248e-06, + "loss": 0.5302, "step": 6466 }, { - "epoch": 0.68, - "grad_norm": 2.6392348923503626, - "learning_rate": 2.446568217471933e-06, - "loss": 0.5916, + "epoch": 0.46, + "grad_norm": 1.7029723094224982, + "learning_rate": 5.903427102853675e-06, + "loss": 0.555, "step": 6467 }, { - "epoch": 0.68, - "grad_norm": 2.7595861356010456, - "learning_rate": 2.445103117931978e-06, - "loss": 0.5833, + "epoch": 0.46, + "grad_norm": 1.9435310067845268, + "learning_rate": 5.902296826245019e-06, + "loss": 0.5297, "step": 6468 }, { - "epoch": 0.68, - "grad_norm": 2.1230177292364067, - "learning_rate": 2.443638315212555e-06, - "loss": 0.5654, + "epoch": 0.46, + "grad_norm": 1.8418662921400502, + "learning_rate": 5.901166501973989e-06, + "loss": 0.5449, "step": 6469 }, { - "epoch": 0.68, - "grad_norm": 2.8062776463140517, - "learning_rate": 2.442173809483845e-06, - "loss": 0.7762, + "epoch": 0.46, + "grad_norm": 1.7736040801869801, + "learning_rate": 5.9000361301002885e-06, + "loss": 0.5476, "step": 6470 }, { - "epoch": 0.68, - "grad_norm": 2.716571799479343, - "learning_rate": 2.440709600915986e-06, - "loss": 0.6991, + "epoch": 0.46, + "grad_norm": 1.9482762803233913, + "learning_rate": 5.898905710683631e-06, + "loss": 0.5692, "step": 6471 }, { - "epoch": 0.68, - "grad_norm": 2.7930103373086728, - "learning_rate": 2.4392456896790874e-06, - "loss": 0.5541, + "epoch": 0.46, + "grad_norm": 1.7334269984723598, + "learning_rate": 5.897775243783726e-06, + "loss": 0.6103, "step": 6472 }, { - "epoch": 0.68, - "grad_norm": 3.224499151376592, - "learning_rate": 2.437782075943224e-06, - "loss": 0.6459, + "epoch": 0.46, + "grad_norm": 1.5231131679306649, + "learning_rate": 5.896644729460293e-06, + "loss": 0.5421, "step": 6473 }, { - "epoch": 0.68, - "grad_norm": 3.395183867623381, - "learning_rate": 2.4363187598784323e-06, - "loss": 0.6556, + "epoch": 0.46, + "grad_norm": 2.267901586270862, + "learning_rate": 5.895514167773046e-06, + "loss": 0.5887, "step": 6474 }, { - "epoch": 0.68, - "grad_norm": 2.1451834322017946, - "learning_rate": 2.4348557416547146e-06, - "loss": 0.6845, + "epoch": 0.46, + "grad_norm": 4.95863221142584, + "learning_rate": 5.894383558781708e-06, + "loss": 0.5376, "step": 6475 }, { - "epoch": 0.68, - "grad_norm": 2.6454970175141983, - "learning_rate": 2.4333930214420414e-06, - "loss": 0.6793, + "epoch": 0.46, + "grad_norm": 0.7462560562836029, + "learning_rate": 5.893252902545999e-06, + "loss": 0.4779, "step": 6476 }, { - "epoch": 0.68, - "grad_norm": 2.1614550552826097, - "learning_rate": 2.43193059941035e-06, - "loss": 0.6409, + "epoch": 0.46, + "grad_norm": 1.5844274750047111, + "learning_rate": 5.892122199125644e-06, + "loss": 0.5039, "step": 6477 }, { - "epoch": 0.68, - "grad_norm": 2.9560127452631026, - "learning_rate": 2.4304684757295376e-06, - "loss": 0.6685, + "epoch": 0.46, + "grad_norm": 1.8888732096707277, + "learning_rate": 5.890991448580372e-06, + "loss": 0.5154, "step": 6478 }, { - "epoch": 0.68, - "grad_norm": 2.385228890681212, - "learning_rate": 2.429006650569468e-06, - "loss": 0.6054, + "epoch": 0.46, + "grad_norm": 1.8066295067264166, + "learning_rate": 5.8898606509699115e-06, + "loss": 0.6317, "step": 6479 }, { - "epoch": 0.68, - "grad_norm": 2.7023174068498594, - "learning_rate": 2.4275451240999743e-06, - "loss": 0.6734, + "epoch": 0.46, + "grad_norm": 2.0124340132045475, + "learning_rate": 5.888729806353996e-06, + "loss": 0.5682, "step": 6480 }, { - "epoch": 0.68, - "grad_norm": 2.8800257059035994, - "learning_rate": 2.4260838964908534e-06, - "loss": 0.6168, + "epoch": 0.46, + "grad_norm": 1.9485768589091188, + "learning_rate": 5.887598914792363e-06, + "loss": 0.5178, "step": 6481 }, { - "epoch": 0.68, - "grad_norm": 2.3173592174188915, - "learning_rate": 2.424622967911863e-06, - "loss": 0.6082, + "epoch": 0.46, + "grad_norm": 1.6811444904472665, + "learning_rate": 5.886467976344748e-06, + "loss": 0.5545, "step": 6482 }, { - "epoch": 0.68, - "grad_norm": 0.9950821121430008, - "learning_rate": 2.4231623385327337e-06, - "loss": 0.5374, + "epoch": 0.46, + "grad_norm": 2.9770245019025023, + "learning_rate": 5.885336991070888e-06, + "loss": 0.5176, "step": 6483 }, { - "epoch": 0.68, - "grad_norm": 2.9226988485243646, - "learning_rate": 2.421702008523153e-06, - "loss": 0.5921, + "epoch": 0.46, + "grad_norm": 0.7716739271670248, + "learning_rate": 5.88420595903053e-06, + "loss": 0.445, "step": 6484 }, { - "epoch": 0.68, - "grad_norm": 2.2019190672941673, - "learning_rate": 2.4202419780527796e-06, - "loss": 0.6282, + "epoch": 0.46, + "grad_norm": 1.9323586162619188, + "learning_rate": 5.883074880283417e-06, + "loss": 0.642, "step": 6485 }, { - "epoch": 0.68, - "grad_norm": 18.87914111722867, - "learning_rate": 2.418782247291238e-06, - "loss": 0.5984, + "epoch": 0.46, + "grad_norm": 1.8309837804236608, + "learning_rate": 5.881943754889295e-06, + "loss": 0.4937, "step": 6486 }, { - "epoch": 0.68, - "grad_norm": 0.9756191834087302, - "learning_rate": 2.4173228164081135e-06, - "loss": 0.5355, + "epoch": 0.46, + "grad_norm": 1.3210912582358003, + "learning_rate": 5.880812582907917e-06, + "loss": 0.4149, "step": 6487 }, { - "epoch": 0.68, - "grad_norm": 2.229047102096132, - "learning_rate": 2.4158636855729563e-06, - "loss": 0.6097, + "epoch": 0.46, + "grad_norm": 2.5590120695836163, + "learning_rate": 5.879681364399033e-06, + "loss": 0.5514, "step": 6488 }, { - "epoch": 0.68, - "grad_norm": 2.160615604668489, - "learning_rate": 2.414404854955286e-06, - "loss": 0.5794, + "epoch": 0.46, + "grad_norm": 1.473701090451251, + "learning_rate": 5.8785500994223995e-06, + "loss": 0.5332, "step": 6489 }, { - "epoch": 0.68, - "grad_norm": 2.5744631310901793, - "learning_rate": 2.4129463247245877e-06, - "loss": 0.6365, + "epoch": 0.46, + "grad_norm": 1.6882501633378515, + "learning_rate": 5.877418788037773e-06, + "loss": 0.6006, "step": 6490 }, { - "epoch": 0.68, - "grad_norm": 2.4664632502489976, - "learning_rate": 2.411488095050305e-06, - "loss": 0.6087, + "epoch": 0.46, + "grad_norm": 1.8244230888729185, + "learning_rate": 5.876287430304912e-06, + "loss": 0.5631, "step": 6491 }, { - "epoch": 0.68, - "grad_norm": 2.2462459640267336, - "learning_rate": 2.410030166101855e-06, - "loss": 0.6314, + "epoch": 0.46, + "grad_norm": 1.9167636052587769, + "learning_rate": 5.87515602628358e-06, + "loss": 0.5776, "step": 6492 }, { - "epoch": 0.68, - "grad_norm": 3.7690797181208953, - "learning_rate": 2.4085725380486106e-06, - "loss": 0.6211, + "epoch": 0.46, + "grad_norm": 1.6900279828057316, + "learning_rate": 5.874024576033541e-06, + "loss": 0.5654, "step": 6493 }, { - "epoch": 0.68, - "grad_norm": 2.3187693057872476, - "learning_rate": 2.4071152110599204e-06, - "loss": 0.5885, + "epoch": 0.46, + "grad_norm": 1.813799726233723, + "learning_rate": 5.872893079614563e-06, + "loss": 0.5371, "step": 6494 }, { - "epoch": 0.68, - "grad_norm": 2.4839016053608605, - "learning_rate": 2.4056581853050877e-06, - "loss": 0.6435, + "epoch": 0.46, + "grad_norm": 1.802170127111877, + "learning_rate": 5.871761537086413e-06, + "loss": 0.5439, "step": 6495 }, { - "epoch": 0.68, - "grad_norm": 3.5230696548262035, - "learning_rate": 2.4042014609533894e-06, - "loss": 0.5636, + "epoch": 0.46, + "grad_norm": 2.123946053483945, + "learning_rate": 5.870629948508867e-06, + "loss": 0.5959, "step": 6496 }, { - "epoch": 0.68, - "grad_norm": 2.54975560910205, - "learning_rate": 2.4027450381740598e-06, - "loss": 0.6318, + "epoch": 0.46, + "grad_norm": 1.4733286324789803, + "learning_rate": 5.869498313941696e-06, + "loss": 0.4765, "step": 6497 }, { - "epoch": 0.68, - "grad_norm": 2.1179176495059173, - "learning_rate": 2.4012889171363034e-06, - "loss": 0.6436, + "epoch": 0.46, + "grad_norm": 1.655261132677402, + "learning_rate": 5.868366633444678e-06, + "loss": 0.5383, "step": 6498 }, { - "epoch": 0.68, - "grad_norm": 2.219085902942063, - "learning_rate": 2.3998330980092906e-06, - "loss": 0.5653, + "epoch": 0.46, + "grad_norm": 1.504868092426369, + "learning_rate": 5.8672349070775925e-06, + "loss": 0.5524, "step": 6499 }, { - "epoch": 0.68, - "grad_norm": 2.4057964800651495, - "learning_rate": 2.3983775809621525e-06, - "loss": 0.6256, + "epoch": 0.46, + "grad_norm": 1.3537507523156453, + "learning_rate": 5.866103134900219e-06, + "loss": 0.4753, "step": 6500 }, { - "epoch": 0.68, - "grad_norm": 2.9344149719041113, - "learning_rate": 2.3969223661639838e-06, - "loss": 0.6745, + "epoch": 0.46, + "grad_norm": 1.7070599027335374, + "learning_rate": 5.864971316972344e-06, + "loss": 0.5086, "step": 6501 }, { - "epoch": 0.68, - "grad_norm": 3.7761690986099326, - "learning_rate": 2.395467453783851e-06, - "loss": 0.5358, + "epoch": 0.46, + "grad_norm": 1.577050630505989, + "learning_rate": 5.863839453353753e-06, + "loss": 0.527, "step": 6502 }, { - "epoch": 0.68, - "grad_norm": 2.3942022138130055, - "learning_rate": 2.394012843990781e-06, - "loss": 0.664, + "epoch": 0.46, + "grad_norm": 1.5888107007520351, + "learning_rate": 5.862707544104236e-06, + "loss": 0.4901, "step": 6503 }, { - "epoch": 0.68, - "grad_norm": 2.3279945476576263, - "learning_rate": 2.3925585369537647e-06, - "loss": 0.6307, + "epoch": 0.46, + "grad_norm": 1.5101230748555103, + "learning_rate": 5.861575589283583e-06, + "loss": 0.4781, "step": 6504 }, { - "epoch": 0.68, - "grad_norm": 4.007225583568133, - "learning_rate": 2.391104532841762e-06, - "loss": 0.6179, + "epoch": 0.46, + "grad_norm": 1.6850334093785295, + "learning_rate": 5.860443588951587e-06, + "loss": 0.5289, "step": 6505 }, { - "epoch": 0.68, - "grad_norm": 3.6218819183172104, - "learning_rate": 2.389650831823691e-06, - "loss": 0.6501, + "epoch": 0.46, + "grad_norm": 1.5225714063852112, + "learning_rate": 5.8593115431680446e-06, + "loss": 0.5485, "step": 6506 }, { - "epoch": 0.68, - "grad_norm": 2.3080723378936723, - "learning_rate": 2.388197434068441e-06, - "loss": 0.6166, + "epoch": 0.46, + "grad_norm": 1.468185126349229, + "learning_rate": 5.858179451992757e-06, + "loss": 0.5116, "step": 6507 }, { - "epoch": 0.68, - "grad_norm": 2.2881275417992613, - "learning_rate": 2.3867443397448646e-06, - "loss": 0.622, + "epoch": 0.46, + "grad_norm": 1.9167290260953755, + "learning_rate": 5.857047315485521e-06, + "loss": 0.6069, "step": 6508 }, { - "epoch": 0.68, - "grad_norm": 2.241683983187079, - "learning_rate": 2.3852915490217772e-06, - "loss": 0.6207, + "epoch": 0.46, + "grad_norm": 1.610856044333189, + "learning_rate": 5.855915133706142e-06, + "loss": 0.5687, "step": 6509 }, { - "epoch": 0.69, - "grad_norm": 2.556095245479891, - "learning_rate": 2.383839062067957e-06, - "loss": 0.5998, + "epoch": 0.46, + "grad_norm": 2.569083219951473, + "learning_rate": 5.854782906714425e-06, + "loss": 0.5437, "step": 6510 }, { - "epoch": 0.69, - "grad_norm": 1.9799784118007415, - "learning_rate": 2.382386879052152e-06, - "loss": 0.6074, + "epoch": 0.46, + "grad_norm": 4.975678275415909, + "learning_rate": 5.853650634570178e-06, + "loss": 0.47, "step": 6511 }, { - "epoch": 0.69, - "grad_norm": 2.3182312355329513, - "learning_rate": 2.3809350001430743e-06, - "loss": 0.5951, + "epoch": 0.46, + "grad_norm": 1.550531104282591, + "learning_rate": 5.852518317333215e-06, + "loss": 0.5513, "step": 6512 }, { - "epoch": 0.69, - "grad_norm": 2.3694748756782027, - "learning_rate": 2.3794834255093977e-06, - "loss": 0.6304, + "epoch": 0.46, + "grad_norm": 1.5076593158282061, + "learning_rate": 5.851385955063342e-06, + "loss": 0.5174, "step": 6513 }, { - "epoch": 0.69, - "grad_norm": 2.2001457523740147, - "learning_rate": 2.37803215531976e-06, - "loss": 0.6331, + "epoch": 0.46, + "grad_norm": 2.4261263481099307, + "learning_rate": 5.850253547820382e-06, + "loss": 0.5786, "step": 6514 }, { - "epoch": 0.69, - "grad_norm": 2.3212863403370796, - "learning_rate": 2.3765811897427667e-06, - "loss": 0.53, + "epoch": 0.46, + "grad_norm": 2.9391863591740357, + "learning_rate": 5.849121095664145e-06, + "loss": 0.5039, "step": 6515 }, { - "epoch": 0.69, - "grad_norm": 3.160468095861783, - "learning_rate": 2.375130528946989e-06, - "loss": 0.7405, + "epoch": 0.46, + "grad_norm": 1.5052218345379327, + "learning_rate": 5.847988598654455e-06, + "loss": 0.5338, "step": 6516 }, { - "epoch": 0.69, - "grad_norm": 2.6855144317046697, - "learning_rate": 2.373680173100957e-06, - "loss": 0.5849, + "epoch": 0.46, + "grad_norm": 5.27682586507779, + "learning_rate": 5.846856056851135e-06, + "loss": 0.5834, "step": 6517 }, { - "epoch": 0.69, - "grad_norm": 2.341892568924303, - "learning_rate": 2.3722301223731724e-06, - "loss": 0.5603, + "epoch": 0.46, + "grad_norm": 1.704266513912345, + "learning_rate": 5.845723470314008e-06, + "loss": 0.5007, "step": 6518 }, { - "epoch": 0.69, - "grad_norm": 2.283085350966203, - "learning_rate": 2.3707803769320943e-06, - "loss": 0.6285, + "epoch": 0.46, + "grad_norm": 1.6868093071475736, + "learning_rate": 5.844590839102901e-06, + "loss": 0.5565, "step": 6519 }, { - "epoch": 0.69, - "grad_norm": 3.545450875140134, - "learning_rate": 2.3693309369461514e-06, - "loss": 0.643, + "epoch": 0.46, + "grad_norm": 1.5717427382718754, + "learning_rate": 5.843458163277646e-06, + "loss": 0.5491, "step": 6520 }, { - "epoch": 0.69, - "grad_norm": 2.34738226126757, - "learning_rate": 2.367881802583738e-06, - "loss": 0.6825, + "epoch": 0.46, + "grad_norm": 1.814481798794096, + "learning_rate": 5.8423254428980715e-06, + "loss": 0.598, "step": 6521 }, { - "epoch": 0.69, - "grad_norm": 2.6575846961053333, - "learning_rate": 2.366432974013208e-06, - "loss": 0.5756, + "epoch": 0.46, + "grad_norm": 1.6822941494722286, + "learning_rate": 5.841192678024013e-06, + "loss": 0.5239, "step": 6522 }, { - "epoch": 0.69, - "grad_norm": 2.6040764810860146, - "learning_rate": 2.36498445140288e-06, - "loss": 0.6637, + "epoch": 0.46, + "grad_norm": 1.7091621356880293, + "learning_rate": 5.8400598687153065e-06, + "loss": 0.5466, "step": 6523 }, { - "epoch": 0.69, - "grad_norm": 2.575034774458675, - "learning_rate": 2.3635362349210423e-06, - "loss": 0.6506, + "epoch": 0.46, + "grad_norm": 1.8703470191990452, + "learning_rate": 5.838927015031792e-06, + "loss": 0.532, "step": 6524 }, { - "epoch": 0.69, - "grad_norm": 2.561154528891339, - "learning_rate": 2.362088324735945e-06, - "loss": 0.585, + "epoch": 0.46, + "grad_norm": 1.9000594990125457, + "learning_rate": 5.837794117033309e-06, + "loss": 0.5833, "step": 6525 }, { - "epoch": 0.69, - "grad_norm": 2.557511457700825, - "learning_rate": 2.3606407210158007e-06, - "loss": 0.7341, + "epoch": 0.46, + "grad_norm": 1.6636464991010342, + "learning_rate": 5.836661174779703e-06, + "loss": 0.4796, "step": 6526 }, { - "epoch": 0.69, - "grad_norm": 5.151485466378373, - "learning_rate": 2.3591934239287858e-06, - "loss": 0.5364, + "epoch": 0.46, + "grad_norm": 1.8181075623051646, + "learning_rate": 5.835528188330818e-06, + "loss": 0.6016, "step": 6527 }, { - "epoch": 0.69, - "grad_norm": 3.2369051385886, - "learning_rate": 2.3577464336430446e-06, - "loss": 0.5904, + "epoch": 0.46, + "grad_norm": 1.7286660746278752, + "learning_rate": 5.834395157746502e-06, + "loss": 0.4815, "step": 6528 }, { - "epoch": 0.69, - "grad_norm": 2.4237630485347683, - "learning_rate": 2.356299750326687e-06, - "loss": 0.6413, + "epoch": 0.46, + "grad_norm": 2.6311749685805332, + "learning_rate": 5.833262083086609e-06, + "loss": 0.5701, "step": 6529 }, { - "epoch": 0.69, - "grad_norm": 2.791157720967069, - "learning_rate": 2.3548533741477807e-06, - "loss": 0.6735, + "epoch": 0.46, + "grad_norm": 1.6225147668060855, + "learning_rate": 5.832128964410987e-06, + "loss": 0.5573, "step": 6530 }, { - "epoch": 0.69, - "grad_norm": 2.3572226180240285, - "learning_rate": 2.353407305274365e-06, - "loss": 0.6745, + "epoch": 0.46, + "grad_norm": 1.6162450168338347, + "learning_rate": 5.830995801779494e-06, + "loss": 0.5186, "step": 6531 }, { - "epoch": 0.69, - "grad_norm": 2.756415728746884, - "learning_rate": 2.3519615438744358e-06, - "loss": 0.6098, + "epoch": 0.46, + "grad_norm": 1.8084536918066667, + "learning_rate": 5.829862595251987e-06, + "loss": 0.556, "step": 6532 }, { - "epoch": 0.69, - "grad_norm": 3.9211036631117326, - "learning_rate": 2.3505160901159596e-06, - "loss": 0.6391, + "epoch": 0.46, + "grad_norm": 1.4588511693307329, + "learning_rate": 5.828729344888326e-06, + "loss": 0.4689, "step": 6533 }, { - "epoch": 0.69, - "grad_norm": 10.804798236915875, - "learning_rate": 2.3490709441668673e-06, - "loss": 0.6013, + "epoch": 0.46, + "grad_norm": 2.297747658567431, + "learning_rate": 5.827596050748373e-06, + "loss": 0.5161, "step": 6534 }, { - "epoch": 0.69, - "grad_norm": 2.3231398228966333, - "learning_rate": 2.34762610619505e-06, - "loss": 0.6146, + "epoch": 0.46, + "grad_norm": 1.6381720660000378, + "learning_rate": 5.826462712891993e-06, + "loss": 0.4965, "step": 6535 }, { - "epoch": 0.69, - "grad_norm": 2.496092282256762, - "learning_rate": 2.346181576368362e-06, - "loss": 0.7055, + "epoch": 0.46, + "grad_norm": 1.7686671079376857, + "learning_rate": 5.82532933137905e-06, + "loss": 0.5698, "step": 6536 }, { - "epoch": 0.69, - "grad_norm": 2.487885892384019, - "learning_rate": 2.344737354854627e-06, - "loss": 0.7101, + "epoch": 0.46, + "grad_norm": 1.6371246449748926, + "learning_rate": 5.824195906269418e-06, + "loss": 0.5254, "step": 6537 }, { - "epoch": 0.69, - "grad_norm": 3.2049421861180685, - "learning_rate": 2.343293441821633e-06, - "loss": 0.672, + "epoch": 0.46, + "grad_norm": 1.5937466057559901, + "learning_rate": 5.823062437622962e-06, + "loss": 0.523, "step": 6538 }, { - "epoch": 0.69, - "grad_norm": 1.9953645112584768, - "learning_rate": 2.3418498374371266e-06, - "loss": 0.6358, + "epoch": 0.46, + "grad_norm": 1.6819719970143758, + "learning_rate": 5.821928925499561e-06, + "loss": 0.6332, "step": 6539 }, { - "epoch": 0.69, - "grad_norm": 2.7737257283166983, - "learning_rate": 2.3404065418688203e-06, - "loss": 0.6623, + "epoch": 0.46, + "grad_norm": 1.7938242791162198, + "learning_rate": 5.820795369959089e-06, + "loss": 0.5317, "step": 6540 }, { - "epoch": 0.69, - "grad_norm": 2.2828290607769466, - "learning_rate": 2.3389635552843943e-06, - "loss": 0.6073, + "epoch": 0.46, + "grad_norm": 1.8836589751167838, + "learning_rate": 5.819661771061426e-06, + "loss": 0.5485, "step": 6541 }, { - "epoch": 0.69, - "grad_norm": 2.7921110029438876, - "learning_rate": 2.3375208778514903e-06, - "loss": 0.6668, + "epoch": 0.46, + "grad_norm": 0.779515597515293, + "learning_rate": 5.8185281288664485e-06, + "loss": 0.4339, "step": 6542 }, { - "epoch": 0.69, - "grad_norm": 6.963063546111307, - "learning_rate": 2.336078509737715e-06, - "loss": 0.7043, + "epoch": 0.46, + "grad_norm": 1.6693759194105433, + "learning_rate": 5.817394443434042e-06, + "loss": 0.5793, "step": 6543 }, { - "epoch": 0.69, - "grad_norm": 1.9709336196500105, - "learning_rate": 2.334636451110639e-06, - "loss": 0.5716, + "epoch": 0.46, + "grad_norm": 1.6917231571672975, + "learning_rate": 5.816260714824092e-06, + "loss": 0.5468, "step": 6544 }, { - "epoch": 0.69, - "grad_norm": 2.052297383518725, - "learning_rate": 2.333194702137793e-06, - "loss": 0.6293, + "epoch": 0.46, + "grad_norm": 1.6269392933831908, + "learning_rate": 5.815126943096485e-06, + "loss": 0.5336, "step": 6545 }, { - "epoch": 0.69, - "grad_norm": 2.2880945024427444, - "learning_rate": 2.331753262986678e-06, - "loss": 0.5453, + "epoch": 0.46, + "grad_norm": 1.9857225426171916, + "learning_rate": 5.81399312831111e-06, + "loss": 0.5888, "step": 6546 }, { - "epoch": 0.69, - "grad_norm": 3.578393672808325, - "learning_rate": 2.330312133824757e-06, - "loss": 0.7206, + "epoch": 0.46, + "grad_norm": 1.629520390041545, + "learning_rate": 5.8128592705278605e-06, + "loss": 0.5203, "step": 6547 }, { - "epoch": 0.69, - "grad_norm": 9.010515317246663, - "learning_rate": 2.3288713148194554e-06, - "loss": 0.6199, + "epoch": 0.46, + "grad_norm": 1.4673899924905942, + "learning_rate": 5.811725369806631e-06, + "loss": 0.4293, "step": 6548 }, { - "epoch": 0.69, - "grad_norm": 1.9927985217423352, - "learning_rate": 2.3274308061381605e-06, - "loss": 0.6139, + "epoch": 0.46, + "grad_norm": 0.8356123724223296, + "learning_rate": 5.810591426207315e-06, + "loss": 0.467, "step": 6549 }, { - "epoch": 0.69, - "grad_norm": 1.9806575010532403, - "learning_rate": 2.32599060794823e-06, - "loss": 0.6179, + "epoch": 0.46, + "grad_norm": 1.5460995032312055, + "learning_rate": 5.809457439789815e-06, + "loss": 0.5419, "step": 6550 }, { - "epoch": 0.69, - "grad_norm": 2.541089321186657, - "learning_rate": 2.324550720416982e-06, - "loss": 0.5371, + "epoch": 0.46, + "grad_norm": 1.7535564319300292, + "learning_rate": 5.808323410614029e-06, + "loss": 0.5144, "step": 6551 }, { - "epoch": 0.69, - "grad_norm": 2.060488905830918, - "learning_rate": 2.3231111437116954e-06, - "loss": 0.6226, + "epoch": 0.46, + "grad_norm": 2.2661333645891237, + "learning_rate": 5.807189338739861e-06, + "loss": 0.5452, "step": 6552 }, { - "epoch": 0.69, - "grad_norm": 2.944439269515085, - "learning_rate": 2.3216718779996205e-06, - "loss": 0.6055, + "epoch": 0.47, + "grad_norm": 2.006246899189775, + "learning_rate": 5.806055224227219e-06, + "loss": 0.5338, "step": 6553 }, { - "epoch": 0.69, - "grad_norm": 2.8098524227724737, - "learning_rate": 2.320232923447962e-06, - "loss": 0.6007, + "epoch": 0.47, + "grad_norm": 1.5713527799346696, + "learning_rate": 5.804921067136007e-06, + "loss": 0.6281, "step": 6554 }, { - "epoch": 0.69, - "grad_norm": 3.017792625313086, - "learning_rate": 2.318794280223897e-06, - "loss": 0.6693, + "epoch": 0.47, + "grad_norm": 1.6159235048794975, + "learning_rate": 5.803786867526138e-06, + "loss": 0.5875, "step": 6555 }, { - "epoch": 0.69, - "grad_norm": 2.2085939033581856, - "learning_rate": 2.317355948494563e-06, - "loss": 0.5985, + "epoch": 0.47, + "grad_norm": 1.9771249425213298, + "learning_rate": 5.802652625457522e-06, + "loss": 0.5601, "step": 6556 }, { - "epoch": 0.69, - "grad_norm": 2.4413921495254867, - "learning_rate": 2.31591792842706e-06, - "loss": 0.6141, + "epoch": 0.47, + "grad_norm": 2.1691213570847827, + "learning_rate": 5.801518340990075e-06, + "loss": 0.5193, "step": 6557 }, { - "epoch": 0.69, - "grad_norm": 2.270526009720749, - "learning_rate": 2.314480220188452e-06, - "loss": 0.6078, + "epoch": 0.47, + "grad_norm": 1.7639881774895407, + "learning_rate": 5.800384014183714e-06, + "loss": 0.5412, "step": 6558 }, { - "epoch": 0.69, - "grad_norm": 2.3574965924340434, - "learning_rate": 2.3130428239457688e-06, - "loss": 0.6009, + "epoch": 0.47, + "grad_norm": 1.9977348245124626, + "learning_rate": 5.799249645098357e-06, + "loss": 0.5909, "step": 6559 }, { - "epoch": 0.69, - "grad_norm": 7.673316161467211, - "learning_rate": 2.3116057398660046e-06, - "loss": 0.5492, + "epoch": 0.47, + "grad_norm": 1.6028082111757918, + "learning_rate": 5.798115233793924e-06, + "loss": 0.6064, "step": 6560 }, { - "epoch": 0.69, - "grad_norm": 2.7207174172029065, - "learning_rate": 2.3101689681161142e-06, - "loss": 0.6563, + "epoch": 0.47, + "grad_norm": 1.7423828058376563, + "learning_rate": 5.796980780330341e-06, + "loss": 0.5602, "step": 6561 }, { - "epoch": 0.69, - "grad_norm": 2.307943954138673, - "learning_rate": 2.308732508863016e-06, - "loss": 0.5498, + "epoch": 0.47, + "grad_norm": 1.7200283719111704, + "learning_rate": 5.795846284767532e-06, + "loss": 0.5749, "step": 6562 }, { - "epoch": 0.69, - "grad_norm": 4.299414059442358, - "learning_rate": 2.307296362273595e-06, - "loss": 0.6527, + "epoch": 0.47, + "grad_norm": 2.4851109977189454, + "learning_rate": 5.7947117471654265e-06, + "loss": 0.5202, "step": 6563 }, { - "epoch": 0.69, - "grad_norm": 3.0267194850070918, - "learning_rate": 2.305860528514701e-06, - "loss": 0.6284, + "epoch": 0.47, + "grad_norm": 1.6845304299926007, + "learning_rate": 5.793577167583954e-06, + "loss": 0.4929, "step": 6564 }, { - "epoch": 0.69, - "grad_norm": 2.6543376447272022, - "learning_rate": 2.304425007753141e-06, - "loss": 0.6456, + "epoch": 0.47, + "grad_norm": 0.7607268729754489, + "learning_rate": 5.792442546083047e-06, + "loss": 0.4464, "step": 6565 }, { - "epoch": 0.69, - "grad_norm": 2.2467729720442384, - "learning_rate": 2.3029898001556928e-06, - "loss": 0.5627, + "epoch": 0.47, + "grad_norm": 1.4754679864135323, + "learning_rate": 5.791307882722638e-06, + "loss": 0.5132, "step": 6566 }, { - "epoch": 0.69, - "grad_norm": 4.086285543773585, - "learning_rate": 2.301554905889092e-06, - "loss": 0.5767, + "epoch": 0.47, + "grad_norm": 1.9879661466868437, + "learning_rate": 5.790173177562666e-06, + "loss": 0.5596, "step": 6567 }, { - "epoch": 0.69, - "grad_norm": 3.212741108195205, - "learning_rate": 2.3001203251200417e-06, - "loss": 0.5448, + "epoch": 0.47, + "grad_norm": 1.7965359323545496, + "learning_rate": 5.789038430663067e-06, + "loss": 0.516, "step": 6568 }, { - "epoch": 0.69, - "grad_norm": 3.394587869638318, - "learning_rate": 2.2986860580152095e-06, - "loss": 0.6423, + "epoch": 0.47, + "grad_norm": 1.6295512429957864, + "learning_rate": 5.787903642083789e-06, + "loss": 0.5197, "step": 6569 }, { - "epoch": 0.69, - "grad_norm": 3.4031910872820763, - "learning_rate": 2.2972521047412223e-06, - "loss": 0.6362, + "epoch": 0.47, + "grad_norm": 2.100171580133209, + "learning_rate": 5.786768811884767e-06, + "loss": 0.5673, "step": 6570 }, { - "epoch": 0.69, - "grad_norm": 4.483025314036182, - "learning_rate": 2.2958184654646705e-06, - "loss": 0.4954, + "epoch": 0.47, + "grad_norm": 1.8698466540805052, + "learning_rate": 5.785633940125953e-06, + "loss": 0.5722, "step": 6571 }, { - "epoch": 0.69, - "grad_norm": 2.548631805057781, - "learning_rate": 2.2943851403521123e-06, - "loss": 0.7172, + "epoch": 0.47, + "grad_norm": 3.3053257114793593, + "learning_rate": 5.7844990268672905e-06, + "loss": 0.5337, "step": 6572 }, { - "epoch": 0.69, - "grad_norm": 4.61360591907676, - "learning_rate": 2.2929521295700695e-06, - "loss": 0.6463, + "epoch": 0.47, + "grad_norm": 2.4963701055644405, + "learning_rate": 5.783364072168732e-06, + "loss": 0.6052, "step": 6573 }, { - "epoch": 0.69, - "grad_norm": 2.8748275398323564, - "learning_rate": 2.2915194332850233e-06, - "loss": 0.6434, + "epoch": 0.47, + "grad_norm": 1.5596048107626008, + "learning_rate": 5.782229076090229e-06, + "loss": 0.5655, "step": 6574 }, { - "epoch": 0.69, - "grad_norm": 2.9897551268506732, - "learning_rate": 2.290087051663418e-06, - "loss": 0.5195, + "epoch": 0.47, + "grad_norm": 1.8048397410522108, + "learning_rate": 5.781094038691735e-06, + "loss": 0.4942, "step": 6575 }, { - "epoch": 0.69, - "grad_norm": 2.3753623260621426, - "learning_rate": 2.288654984871665e-06, - "loss": 0.485, + "epoch": 0.47, + "grad_norm": 1.9452005700958668, + "learning_rate": 5.779958960033206e-06, + "loss": 0.5232, "step": 6576 }, { - "epoch": 0.69, - "grad_norm": 2.635343007038432, - "learning_rate": 2.2872232330761383e-06, - "loss": 0.6737, + "epoch": 0.47, + "grad_norm": 2.066144954196438, + "learning_rate": 5.778823840174604e-06, + "loss": 0.5086, "step": 6577 }, { - "epoch": 0.69, - "grad_norm": 2.5493524141073918, - "learning_rate": 2.285791796443176e-06, - "loss": 0.6043, + "epoch": 0.47, + "grad_norm": 2.1290465068300346, + "learning_rate": 5.777688679175887e-06, + "loss": 0.5219, "step": 6578 }, { - "epoch": 0.69, - "grad_norm": 2.9622930982127853, - "learning_rate": 2.284360675139078e-06, - "loss": 0.6278, + "epoch": 0.47, + "grad_norm": 1.6174604035963174, + "learning_rate": 5.776553477097019e-06, + "loss": 0.5501, "step": 6579 }, { - "epoch": 0.69, - "grad_norm": 4.097813348559686, - "learning_rate": 2.282929869330104e-06, - "loss": 0.6418, + "epoch": 0.47, + "grad_norm": 2.265118118215325, + "learning_rate": 5.775418233997965e-06, + "loss": 0.5377, "step": 6580 }, { - "epoch": 0.69, - "grad_norm": 2.6376842210758853, - "learning_rate": 2.2814993791824836e-06, - "loss": 0.6048, + "epoch": 0.47, + "grad_norm": 0.7921459288201813, + "learning_rate": 5.774282949938692e-06, + "loss": 0.4668, "step": 6581 }, { - "epoch": 0.69, - "grad_norm": 3.058425958808465, - "learning_rate": 2.2800692048624092e-06, - "loss": 0.6329, + "epoch": 0.47, + "grad_norm": 1.7498145047295934, + "learning_rate": 5.77314762497917e-06, + "loss": 0.4956, "step": 6582 }, { - "epoch": 0.69, - "grad_norm": 0.9682730563914436, - "learning_rate": 2.278639346536031e-06, - "loss": 0.5591, + "epoch": 0.47, + "grad_norm": 1.7969535894835564, + "learning_rate": 5.772012259179371e-06, + "loss": 0.5483, "step": 6583 }, { - "epoch": 0.69, - "grad_norm": 3.6023672656105106, - "learning_rate": 2.2772098043694656e-06, - "loss": 0.6068, + "epoch": 0.47, + "grad_norm": 0.7415886293354063, + "learning_rate": 5.770876852599268e-06, + "loss": 0.4276, "step": 6584 }, { - "epoch": 0.69, - "grad_norm": 2.5349956031101852, - "learning_rate": 2.2757805785287946e-06, - "loss": 0.5998, + "epoch": 0.47, + "grad_norm": 2.004676155991473, + "learning_rate": 5.769741405298838e-06, + "loss": 0.5775, "step": 6585 }, { - "epoch": 0.69, - "grad_norm": 1.9259914387049755, - "learning_rate": 2.274351669180063e-06, - "loss": 0.5596, + "epoch": 0.47, + "grad_norm": 1.7890772872768979, + "learning_rate": 5.76860591733806e-06, + "loss": 0.5407, "step": 6586 }, { - "epoch": 0.69, - "grad_norm": 2.1838133733921383, - "learning_rate": 2.272923076489275e-06, - "loss": 0.6713, + "epoch": 0.47, + "grad_norm": 1.869542100299995, + "learning_rate": 5.7674703887769105e-06, + "loss": 0.553, "step": 6587 }, { - "epoch": 0.69, - "grad_norm": 2.354737589610576, - "learning_rate": 2.271494800622399e-06, - "loss": 0.5937, + "epoch": 0.47, + "grad_norm": 1.6012212973383677, + "learning_rate": 5.766334819675376e-06, + "loss": 0.5932, "step": 6588 }, { - "epoch": 0.69, - "grad_norm": 3.3360105669346956, - "learning_rate": 2.2700668417453703e-06, - "loss": 0.6004, + "epoch": 0.47, + "grad_norm": 9.575654640004853, + "learning_rate": 5.765199210093439e-06, + "loss": 0.5435, "step": 6589 }, { - "epoch": 0.69, - "grad_norm": 2.521181364398551, - "learning_rate": 2.2686392000240838e-06, - "loss": 0.6411, + "epoch": 0.47, + "grad_norm": 1.66334889412972, + "learning_rate": 5.764063560091087e-06, + "loss": 0.5184, "step": 6590 }, { - "epoch": 0.69, - "grad_norm": 2.3567654473778523, - "learning_rate": 2.2672118756244014e-06, - "loss": 0.6683, + "epoch": 0.47, + "grad_norm": 0.8187527659932782, + "learning_rate": 5.762927869728308e-06, + "loss": 0.4767, "step": 6591 }, { - "epoch": 0.69, - "grad_norm": 2.2629509475772647, - "learning_rate": 2.2657848687121444e-06, - "loss": 0.5745, + "epoch": 0.47, + "grad_norm": 2.0357744904535315, + "learning_rate": 5.7617921390650945e-06, + "loss": 0.53, "step": 6592 }, { - "epoch": 0.69, - "grad_norm": 2.1762686821367487, - "learning_rate": 2.2643581794530943e-06, - "loss": 0.6356, + "epoch": 0.47, + "grad_norm": 1.9753597136221202, + "learning_rate": 5.76065636816144e-06, + "loss": 0.5282, "step": 6593 }, { - "epoch": 0.69, - "grad_norm": 2.4123508723145535, - "learning_rate": 2.2629318080130042e-06, - "loss": 0.638, + "epoch": 0.47, + "grad_norm": 2.3260688541496064, + "learning_rate": 5.759520557077337e-06, + "loss": 0.5609, "step": 6594 }, { - "epoch": 0.69, - "grad_norm": 8.803556073787599, - "learning_rate": 2.261505754557586e-06, - "loss": 0.6717, + "epoch": 0.47, + "grad_norm": 1.494799226710269, + "learning_rate": 5.758384705872786e-06, + "loss": 0.497, "step": 6595 }, { - "epoch": 0.69, - "grad_norm": 2.8959725718418525, - "learning_rate": 2.260080019252513e-06, - "loss": 0.5497, + "epoch": 0.47, + "grad_norm": 1.5495709125023163, + "learning_rate": 5.757248814607784e-06, + "loss": 0.5311, "step": 6596 }, { - "epoch": 0.69, - "grad_norm": 2.3343782073258676, - "learning_rate": 2.258654602263421e-06, - "loss": 0.6782, + "epoch": 0.47, + "grad_norm": 1.5708119918853996, + "learning_rate": 5.756112883342334e-06, + "loss": 0.5853, "step": 6597 }, { - "epoch": 0.69, - "grad_norm": 2.0752300588139856, - "learning_rate": 2.2572295037559135e-06, - "loss": 0.6159, + "epoch": 0.47, + "grad_norm": 2.2078653977089955, + "learning_rate": 5.754976912136439e-06, + "loss": 0.5919, "step": 6598 }, { - "epoch": 0.69, - "grad_norm": 2.4484059409198364, - "learning_rate": 2.2558047238955547e-06, - "loss": 0.6265, + "epoch": 0.47, + "grad_norm": 1.5936759250261905, + "learning_rate": 5.753840901050107e-06, + "loss": 0.4539, "step": 6599 }, { - "epoch": 0.69, - "grad_norm": 3.0167746887631335, - "learning_rate": 2.2543802628478695e-06, - "loss": 0.6153, + "epoch": 0.47, + "grad_norm": 1.7786963315152928, + "learning_rate": 5.752704850143342e-06, + "loss": 0.533, "step": 6600 }, { - "epoch": 0.69, - "grad_norm": 3.7309284321227167, - "learning_rate": 2.2529561207783495e-06, - "loss": 0.5535, + "epoch": 0.47, + "grad_norm": 1.5168067489811323, + "learning_rate": 5.7515687594761596e-06, + "loss": 0.4708, "step": 6601 }, { - "epoch": 0.69, - "grad_norm": 2.5843645635460657, - "learning_rate": 2.251532297852445e-06, - "loss": 0.5995, + "epoch": 0.47, + "grad_norm": 1.5622053000885727, + "learning_rate": 5.750432629108566e-06, + "loss": 0.5198, "step": 6602 }, { - "epoch": 0.69, - "grad_norm": 2.1738162447033127, - "learning_rate": 2.2501087942355736e-06, - "loss": 0.7412, + "epoch": 0.47, + "grad_norm": 1.745616218048588, + "learning_rate": 5.749296459100579e-06, + "loss": 0.5908, "step": 6603 }, { - "epoch": 0.69, - "grad_norm": 2.4229284055465707, - "learning_rate": 2.2486856100931146e-06, - "loss": 0.6312, + "epoch": 0.47, + "grad_norm": 0.8534249230972306, + "learning_rate": 5.748160249512212e-06, + "loss": 0.4548, "step": 6604 }, { - "epoch": 0.7, - "grad_norm": 2.8696482572875674, - "learning_rate": 2.2472627455904086e-06, - "loss": 0.5519, + "epoch": 0.47, + "grad_norm": 1.6624628453546941, + "learning_rate": 5.747024000403488e-06, + "loss": 0.528, "step": 6605 }, { - "epoch": 0.7, - "grad_norm": 2.844232232179864, - "learning_rate": 2.2458402008927578e-06, - "loss": 0.6271, + "epoch": 0.47, + "grad_norm": 2.13237973219662, + "learning_rate": 5.7458877118344235e-06, + "loss": 0.5474, "step": 6606 }, { - "epoch": 0.7, - "grad_norm": 2.404083137621484, - "learning_rate": 2.24441797616543e-06, - "loss": 0.5466, + "epoch": 0.47, + "grad_norm": 1.544974271944382, + "learning_rate": 5.744751383865043e-06, + "loss": 0.5578, "step": 6607 }, { - "epoch": 0.7, - "grad_norm": 2.567530863025546, - "learning_rate": 2.2429960715736588e-06, - "loss": 0.5551, + "epoch": 0.47, + "grad_norm": 3.155690954607798, + "learning_rate": 5.743615016555373e-06, + "loss": 0.507, "step": 6608 }, { - "epoch": 0.7, - "grad_norm": 2.2882668446276555, - "learning_rate": 2.241574487282634e-06, - "loss": 0.6007, + "epoch": 0.47, + "grad_norm": 1.6088632694038116, + "learning_rate": 5.742478609965435e-06, + "loss": 0.549, "step": 6609 }, { - "epoch": 0.7, - "grad_norm": 2.110230304278789, - "learning_rate": 2.24015322345751e-06, - "loss": 0.6316, + "epoch": 0.47, + "grad_norm": 1.612726715281011, + "learning_rate": 5.741342164155263e-06, + "loss": 0.4767, "step": 6610 }, { - "epoch": 0.7, - "grad_norm": 56.41508535977514, - "learning_rate": 2.2387322802634065e-06, - "loss": 0.6579, + "epoch": 0.47, + "grad_norm": 2.288136149941527, + "learning_rate": 5.740205679184885e-06, + "loss": 0.5241, "step": 6611 }, { - "epoch": 0.7, - "grad_norm": 2.2521890735234713, - "learning_rate": 2.2373116578654042e-06, - "loss": 0.6235, + "epoch": 0.47, + "grad_norm": 1.7465710402057408, + "learning_rate": 5.739069155114335e-06, + "loss": 0.5036, "step": 6612 }, { - "epoch": 0.7, - "grad_norm": 3.234967262280706, - "learning_rate": 2.2358913564285496e-06, - "loss": 0.5794, + "epoch": 0.47, + "grad_norm": 2.958629777481797, + "learning_rate": 5.7379325920036475e-06, + "loss": 0.5413, "step": 6613 }, { - "epoch": 0.7, - "grad_norm": 2.976223938280554, - "learning_rate": 2.234471376117847e-06, - "loss": 0.6562, + "epoch": 0.47, + "grad_norm": 1.6609878250657621, + "learning_rate": 5.736795989912861e-06, + "loss": 0.5238, "step": 6614 }, { - "epoch": 0.7, - "grad_norm": 2.5700256793799388, - "learning_rate": 2.2330517170982634e-06, - "loss": 0.5574, + "epoch": 0.47, + "grad_norm": 2.0560669417258426, + "learning_rate": 5.735659348902013e-06, + "loss": 0.5869, "step": 6615 }, { - "epoch": 0.7, - "grad_norm": 3.3477002796845468, - "learning_rate": 2.2316323795347334e-06, - "loss": 0.6504, + "epoch": 0.47, + "grad_norm": 1.532075424044709, + "learning_rate": 5.734522669031146e-06, + "loss": 0.4669, "step": 6616 }, { - "epoch": 0.7, - "grad_norm": 2.155545624420643, - "learning_rate": 2.2302133635921524e-06, - "loss": 0.6096, + "epoch": 0.47, + "grad_norm": 1.781752735091126, + "learning_rate": 5.733385950360302e-06, + "loss": 0.5607, "step": 6617 }, { - "epoch": 0.7, - "grad_norm": 2.271504833154178, - "learning_rate": 2.2287946694353764e-06, - "loss": 0.6292, + "epoch": 0.47, + "grad_norm": 1.6572238765680563, + "learning_rate": 5.732249192949527e-06, + "loss": 0.5169, "step": 6618 }, { - "epoch": 0.7, - "grad_norm": 1.9615477986067251, - "learning_rate": 2.2273762972292227e-06, - "loss": 0.5254, + "epoch": 0.47, + "grad_norm": 2.1189460427642324, + "learning_rate": 5.731112396858869e-06, + "loss": 0.6012, "step": 6619 }, { - "epoch": 0.7, - "grad_norm": 2.347528126787625, - "learning_rate": 2.2259582471384765e-06, - "loss": 0.5771, + "epoch": 0.47, + "grad_norm": 1.7317856924250314, + "learning_rate": 5.7299755621483776e-06, + "loss": 0.564, "step": 6620 }, { - "epoch": 0.7, - "grad_norm": 2.0790518781036624, - "learning_rate": 2.224540519327884e-06, - "loss": 0.5152, + "epoch": 0.47, + "grad_norm": 1.9411395948074164, + "learning_rate": 5.728838688878102e-06, + "loss": 0.5716, "step": 6621 }, { - "epoch": 0.7, - "grad_norm": 2.4932893443972515, - "learning_rate": 2.2231231139621505e-06, - "loss": 0.6342, + "epoch": 0.47, + "grad_norm": 1.6536454088791743, + "learning_rate": 5.727701777108098e-06, + "loss": 0.5846, "step": 6622 }, { - "epoch": 0.7, - "grad_norm": 2.40013914291326, - "learning_rate": 2.2217060312059453e-06, - "loss": 0.6919, + "epoch": 0.47, + "grad_norm": 2.0199823398020422, + "learning_rate": 5.7265648268984195e-06, + "loss": 0.5287, "step": 6623 }, { - "epoch": 0.7, - "grad_norm": 3.7257495545768244, - "learning_rate": 2.2202892712239016e-06, - "loss": 0.6643, + "epoch": 0.47, + "grad_norm": 1.875344510428827, + "learning_rate": 5.725427838309125e-06, + "loss": 0.6027, "step": 6624 }, { - "epoch": 0.7, - "grad_norm": 3.029296071523241, - "learning_rate": 2.2188728341806153e-06, - "loss": 0.5941, + "epoch": 0.47, + "grad_norm": 1.8531987267296437, + "learning_rate": 5.724290811400276e-06, + "loss": 0.5658, "step": 6625 }, { - "epoch": 0.7, - "grad_norm": 2.587522142486169, - "learning_rate": 2.2174567202406455e-06, - "loss": 0.689, + "epoch": 0.47, + "grad_norm": 1.6631067710312177, + "learning_rate": 5.7231537462319306e-06, + "loss": 0.5973, "step": 6626 }, { - "epoch": 0.7, - "grad_norm": 3.102337698108396, - "learning_rate": 2.2160409295685105e-06, - "loss": 0.6263, + "epoch": 0.47, + "grad_norm": 1.4982126379101766, + "learning_rate": 5.722016642864154e-06, + "loss": 0.5478, "step": 6627 }, { - "epoch": 0.7, - "grad_norm": 2.442746996525038, - "learning_rate": 2.2146254623286905e-06, - "loss": 0.6833, + "epoch": 0.47, + "grad_norm": 1.5298938964956168, + "learning_rate": 5.720879501357011e-06, + "loss": 0.5098, "step": 6628 }, { - "epoch": 0.7, - "grad_norm": 3.678411820711548, - "learning_rate": 2.213210318685633e-06, - "loss": 0.6105, + "epoch": 0.47, + "grad_norm": 1.5479555871060233, + "learning_rate": 5.7197423217705714e-06, + "loss": 0.6086, "step": 6629 }, { - "epoch": 0.7, - "grad_norm": 2.314390976374499, - "learning_rate": 2.2117954988037467e-06, - "loss": 0.6826, + "epoch": 0.47, + "grad_norm": 1.745614172295436, + "learning_rate": 5.7186051041649026e-06, + "loss": 0.5309, "step": 6630 }, { - "epoch": 0.7, - "grad_norm": 2.3034289358297007, - "learning_rate": 2.210381002847399e-06, - "loss": 0.6343, + "epoch": 0.47, + "grad_norm": 1.5024643555518982, + "learning_rate": 5.717467848600078e-06, + "loss": 0.5176, "step": 6631 }, { - "epoch": 0.7, - "grad_norm": 2.9230039460865647, - "learning_rate": 2.208966830980921e-06, - "loss": 0.629, + "epoch": 0.47, + "grad_norm": 1.6087249159044514, + "learning_rate": 5.7163305551361705e-06, + "loss": 0.556, "step": 6632 }, { - "epoch": 0.7, - "grad_norm": 2.215059832781754, - "learning_rate": 2.207552983368608e-06, - "loss": 0.5855, + "epoch": 0.47, + "grad_norm": 1.578641439905476, + "learning_rate": 5.7151932238332556e-06, + "loss": 0.5562, "step": 6633 }, { - "epoch": 0.7, - "grad_norm": 3.733877578923876, - "learning_rate": 2.20613946017472e-06, - "loss": 0.561, + "epoch": 0.47, + "grad_norm": 2.28683546376332, + "learning_rate": 5.71405585475141e-06, + "loss": 0.5538, "step": 6634 }, { - "epoch": 0.7, - "grad_norm": 5.184821592844948, - "learning_rate": 2.2047262615634723e-06, - "loss": 0.5693, + "epoch": 0.47, + "grad_norm": 1.7231499120955505, + "learning_rate": 5.712918447950716e-06, + "loss": 0.5842, "step": 6635 }, { - "epoch": 0.7, - "grad_norm": 2.3808532135782983, - "learning_rate": 2.203313387699046e-06, - "loss": 0.61, + "epoch": 0.47, + "grad_norm": 1.4754039808961341, + "learning_rate": 5.711781003491254e-06, + "loss": 0.4663, "step": 6636 }, { - "epoch": 0.7, - "grad_norm": 9.185810849417573, - "learning_rate": 2.201900838745586e-06, - "loss": 0.6877, + "epoch": 0.47, + "grad_norm": 1.9479050511366132, + "learning_rate": 5.710643521433107e-06, + "loss": 0.4729, "step": 6637 }, { - "epoch": 0.7, - "grad_norm": 2.901166357224412, - "learning_rate": 2.2004886148671978e-06, - "loss": 0.6226, + "epoch": 0.47, + "grad_norm": 1.735796109039984, + "learning_rate": 5.709506001836361e-06, + "loss": 0.5188, "step": 6638 }, { - "epoch": 0.7, - "grad_norm": 2.8487030064529053, - "learning_rate": 2.1990767162279515e-06, - "loss": 0.5769, + "epoch": 0.47, + "grad_norm": 1.7540777761182584, + "learning_rate": 5.708368444761105e-06, + "loss": 0.6254, "step": 6639 }, { - "epoch": 0.7, - "grad_norm": 0.9023289850683952, - "learning_rate": 2.197665142991876e-06, - "loss": 0.4856, + "epoch": 0.47, + "grad_norm": 0.7520759847319023, + "learning_rate": 5.707230850267427e-06, + "loss": 0.4399, "step": 6640 }, { - "epoch": 0.7, - "grad_norm": 2.343960901159718, - "learning_rate": 2.196253895322961e-06, - "loss": 0.5961, + "epoch": 0.47, + "grad_norm": 1.5699159567040197, + "learning_rate": 5.706093218415418e-06, + "loss": 0.5632, "step": 6641 }, { - "epoch": 0.7, - "grad_norm": 4.467203125378956, - "learning_rate": 2.1948429733851646e-06, - "loss": 0.6625, + "epoch": 0.47, + "grad_norm": 1.5737620587970302, + "learning_rate": 5.704955549265173e-06, + "loss": 0.5212, "step": 6642 }, { - "epoch": 0.7, - "grad_norm": 2.702936295244413, - "learning_rate": 2.193432377342404e-06, - "loss": 0.6413, + "epoch": 0.47, + "grad_norm": 1.5102354240979676, + "learning_rate": 5.703817842876786e-06, + "loss": 0.5176, "step": 6643 }, { - "epoch": 0.7, - "grad_norm": 2.8056618447753277, - "learning_rate": 2.1920221073585564e-06, - "loss": 0.5931, + "epoch": 0.47, + "grad_norm": 1.550836481535434, + "learning_rate": 5.702680099310359e-06, + "loss": 0.531, "step": 6644 }, { - "epoch": 0.7, - "grad_norm": 2.79010389004188, - "learning_rate": 2.190612163597462e-06, - "loss": 0.6772, + "epoch": 0.47, + "grad_norm": 1.797260942151853, + "learning_rate": 5.701542318625985e-06, + "loss": 0.5102, "step": 6645 }, { - "epoch": 0.7, - "grad_norm": 4.5825220168109295, - "learning_rate": 2.189202546222925e-06, - "loss": 0.6462, + "epoch": 0.47, + "grad_norm": 1.7630394726064567, + "learning_rate": 5.700404500883772e-06, + "loss": 0.548, "step": 6646 }, { - "epoch": 0.7, - "grad_norm": 2.6022305800555974, - "learning_rate": 2.1877932553987114e-06, - "loss": 0.6701, + "epoch": 0.47, + "grad_norm": 2.5566499576517447, + "learning_rate": 5.699266646143817e-06, + "loss": 0.53, "step": 6647 }, { - "epoch": 0.7, - "grad_norm": 2.1361674026822683, - "learning_rate": 2.1863842912885496e-06, - "loss": 0.569, + "epoch": 0.47, + "grad_norm": 1.476067404257678, + "learning_rate": 5.6981287544662286e-06, + "loss": 0.5643, "step": 6648 }, { - "epoch": 0.7, - "grad_norm": 3.1167050044456364, - "learning_rate": 2.184975654056128e-06, - "loss": 0.745, + "epoch": 0.47, + "grad_norm": 3.980127810229455, + "learning_rate": 5.696990825911114e-06, + "loss": 0.5678, "step": 6649 }, { - "epoch": 0.7, - "grad_norm": 2.735863377596502, - "learning_rate": 2.183567343865095e-06, - "loss": 0.607, + "epoch": 0.47, + "grad_norm": 1.6917379651067455, + "learning_rate": 5.695852860538585e-06, + "loss": 0.5347, "step": 6650 }, { - "epoch": 0.7, - "grad_norm": 2.4462949583121554, - "learning_rate": 2.182159360879067e-06, - "loss": 0.587, + "epoch": 0.47, + "grad_norm": 1.8662038937587992, + "learning_rate": 5.694714858408746e-06, + "loss": 0.5441, "step": 6651 }, { - "epoch": 0.7, - "grad_norm": 1.9840779320355102, - "learning_rate": 2.1807517052616205e-06, - "loss": 0.5347, + "epoch": 0.47, + "grad_norm": 1.7485397760104502, + "learning_rate": 5.693576819581717e-06, + "loss": 0.4951, "step": 6652 }, { - "epoch": 0.7, - "grad_norm": 16.6142045279403, - "learning_rate": 2.1793443771762912e-06, - "loss": 0.5878, + "epoch": 0.47, + "grad_norm": 1.5040995838627809, + "learning_rate": 5.692438744117609e-06, + "loss": 0.526, "step": 6653 }, { - "epoch": 0.7, - "grad_norm": 2.1547249179603583, - "learning_rate": 2.177937376786577e-06, - "loss": 0.5679, + "epoch": 0.47, + "grad_norm": 1.6959696372402153, + "learning_rate": 5.691300632076541e-06, + "loss": 0.5168, "step": 6654 }, { - "epoch": 0.7, - "grad_norm": 3.7649378439066905, - "learning_rate": 2.17653070425594e-06, - "loss": 0.6205, + "epoch": 0.47, + "grad_norm": 0.7247403248877953, + "learning_rate": 5.6901624835186295e-06, + "loss": 0.4757, "step": 6655 }, { - "epoch": 0.7, - "grad_norm": 3.1627346207064804, - "learning_rate": 2.175124359747806e-06, - "loss": 0.653, + "epoch": 0.47, + "grad_norm": 1.548243095282225, + "learning_rate": 5.689024298503996e-06, + "loss": 0.5224, "step": 6656 }, { - "epoch": 0.7, - "grad_norm": 3.3916267839635585, - "learning_rate": 2.173718343425558e-06, - "loss": 0.5896, + "epoch": 0.47, + "grad_norm": 1.700848735867785, + "learning_rate": 5.687886077092768e-06, + "loss": 0.5178, "step": 6657 }, { - "epoch": 0.7, - "grad_norm": 3.269804794003583, - "learning_rate": 2.1723126554525415e-06, - "loss": 0.6527, + "epoch": 0.47, + "grad_norm": 0.7672379756173603, + "learning_rate": 5.686747819345064e-06, + "loss": 0.4344, "step": 6658 }, { - "epoch": 0.7, - "grad_norm": 1.0320795601481907, - "learning_rate": 2.1709072959920667e-06, - "loss": 0.525, + "epoch": 0.47, + "grad_norm": 1.9156117373252597, + "learning_rate": 5.685609525321015e-06, + "loss": 0.4879, "step": 6659 }, { - "epoch": 0.7, - "grad_norm": 2.3228826509557523, - "learning_rate": 2.169502265207404e-06, - "loss": 0.5739, + "epoch": 0.47, + "grad_norm": 2.1940886734271228, + "learning_rate": 5.684471195080746e-06, + "loss": 0.5469, "step": 6660 }, { - "epoch": 0.7, - "grad_norm": 2.952927663361331, - "learning_rate": 2.168097563261787e-06, - "loss": 0.6047, + "epoch": 0.47, + "grad_norm": 1.7734389924979084, + "learning_rate": 5.683332828684391e-06, + "loss": 0.5569, "step": 6661 }, { - "epoch": 0.7, - "grad_norm": 2.120080095383413, - "learning_rate": 2.1666931903184103e-06, - "loss": 0.5871, + "epoch": 0.47, + "grad_norm": 1.8535064391867826, + "learning_rate": 5.6821944261920794e-06, + "loss": 0.5555, "step": 6662 }, { - "epoch": 0.7, - "grad_norm": 2.141689567051116, - "learning_rate": 2.1652891465404257e-06, - "loss": 0.6272, + "epoch": 0.47, + "grad_norm": 2.054237461112791, + "learning_rate": 5.681055987663946e-06, + "loss": 0.5164, "step": 6663 }, { - "epoch": 0.7, - "grad_norm": 2.7913975017146035, - "learning_rate": 2.1638854320909542e-06, - "loss": 0.7013, + "epoch": 0.47, + "grad_norm": 1.6101718658838793, + "learning_rate": 5.679917513160128e-06, + "loss": 0.563, "step": 6664 }, { - "epoch": 0.7, - "grad_norm": 2.2400371485167927, - "learning_rate": 2.162482047133076e-06, - "loss": 0.56, + "epoch": 0.47, + "grad_norm": 0.7428838528857571, + "learning_rate": 5.678779002740764e-06, + "loss": 0.4424, "step": 6665 }, { - "epoch": 0.7, - "grad_norm": 3.3342661126733666, - "learning_rate": 2.161078991829832e-06, - "loss": 0.635, + "epoch": 0.47, + "grad_norm": 2.000729852809355, + "learning_rate": 5.677640456465992e-06, + "loss": 0.5954, "step": 6666 }, { - "epoch": 0.7, - "grad_norm": 3.536358295411925, - "learning_rate": 2.159676266344222e-06, - "loss": 0.6228, + "epoch": 0.47, + "grad_norm": 1.6723443851362405, + "learning_rate": 5.676501874395956e-06, + "loss": 0.4885, "step": 6667 }, { - "epoch": 0.7, - "grad_norm": 2.503851980349559, - "learning_rate": 2.1582738708392127e-06, - "loss": 0.5741, + "epoch": 0.47, + "grad_norm": 1.8813192666603997, + "learning_rate": 5.675363256590798e-06, + "loss": 0.5199, "step": 6668 }, { - "epoch": 0.7, - "grad_norm": 3.0980965236347804, - "learning_rate": 2.1568718054777322e-06, - "loss": 0.6151, + "epoch": 0.47, + "grad_norm": 1.5842149677278634, + "learning_rate": 5.674224603110665e-06, + "loss": 0.4859, "step": 6669 }, { - "epoch": 0.7, - "grad_norm": 3.792431692180165, - "learning_rate": 2.1554700704226673e-06, - "loss": 0.6017, + "epoch": 0.47, + "grad_norm": 2.1258460391581875, + "learning_rate": 5.673085914015701e-06, + "loss": 0.579, "step": 6670 }, { - "epoch": 0.7, - "grad_norm": 2.0346739479145444, - "learning_rate": 2.1540686658368643e-06, - "loss": 0.5778, + "epoch": 0.47, + "grad_norm": 2.071915758261809, + "learning_rate": 5.671947189366061e-06, + "loss": 0.5326, "step": 6671 }, { - "epoch": 0.7, - "grad_norm": 2.666257894353589, - "learning_rate": 2.1526675918831373e-06, - "loss": 0.6578, + "epoch": 0.47, + "grad_norm": 1.7858896533764734, + "learning_rate": 5.670808429221894e-06, + "loss": 0.5025, "step": 6672 }, { - "epoch": 0.7, - "grad_norm": 2.726228194370597, - "learning_rate": 2.151266848724259e-06, - "loss": 0.6432, + "epoch": 0.47, + "grad_norm": 1.630768100685377, + "learning_rate": 5.669669633643351e-06, + "loss": 0.5326, "step": 6673 }, { - "epoch": 0.7, - "grad_norm": 2.5487776785030176, - "learning_rate": 2.149866436522965e-06, - "loss": 0.6072, + "epoch": 0.47, + "grad_norm": 0.8313405851425825, + "learning_rate": 5.668530802690591e-06, + "loss": 0.4628, "step": 6674 }, { - "epoch": 0.7, - "grad_norm": 2.8073196930271074, - "learning_rate": 2.1484663554419495e-06, - "loss": 0.6205, + "epoch": 0.47, + "grad_norm": 1.8188713082885533, + "learning_rate": 5.667391936423767e-06, + "loss": 0.6095, "step": 6675 }, { - "epoch": 0.7, - "grad_norm": 2.2783150428762, - "learning_rate": 2.147066605643868e-06, - "loss": 0.6129, + "epoch": 0.47, + "grad_norm": 0.7410904978841278, + "learning_rate": 5.6662530349030396e-06, + "loss": 0.4404, "step": 6676 }, { - "epoch": 0.7, - "grad_norm": 6.203415894181792, - "learning_rate": 2.145667187291341e-06, - "loss": 0.6235, + "epoch": 0.47, + "grad_norm": 0.6797022392886681, + "learning_rate": 5.66511409818857e-06, + "loss": 0.4485, "step": 6677 }, { - "epoch": 0.7, - "grad_norm": 2.0439147089340683, - "learning_rate": 2.144268100546951e-06, - "loss": 0.5993, + "epoch": 0.47, + "grad_norm": 1.6991112340208845, + "learning_rate": 5.663975126340519e-06, + "loss": 0.5114, "step": 6678 }, { - "epoch": 0.7, - "grad_norm": 2.351141372515001, - "learning_rate": 2.1428693455732384e-06, - "loss": 0.5968, + "epoch": 0.47, + "grad_norm": 1.4949186231886575, + "learning_rate": 5.662836119419053e-06, + "loss": 0.5263, "step": 6679 }, { - "epoch": 0.7, - "grad_norm": 2.2933943463025726, - "learning_rate": 2.141470922532704e-06, - "loss": 0.6489, + "epoch": 0.47, + "grad_norm": 1.5968698878481151, + "learning_rate": 5.6616970774843375e-06, + "loss": 0.4368, "step": 6680 }, { - "epoch": 0.7, - "grad_norm": 2.3273415621641216, - "learning_rate": 2.140072831587815e-06, - "loss": 0.635, + "epoch": 0.47, + "grad_norm": 1.8765070184513248, + "learning_rate": 5.660558000596539e-06, + "loss": 0.5802, "step": 6681 }, { - "epoch": 0.7, - "grad_norm": 2.2821768908749736, - "learning_rate": 2.138675072900997e-06, - "loss": 0.5997, + "epoch": 0.47, + "grad_norm": 1.598715098987565, + "learning_rate": 5.659418888815829e-06, + "loss": 0.5267, "step": 6682 }, { - "epoch": 0.7, - "grad_norm": 3.089940278859609, - "learning_rate": 2.1372776466346414e-06, - "loss": 0.5724, + "epoch": 0.47, + "grad_norm": 1.7994963055067141, + "learning_rate": 5.658279742202379e-06, + "loss": 0.5439, "step": 6683 }, { - "epoch": 0.7, - "grad_norm": 2.251126225010119, - "learning_rate": 2.1358805529510896e-06, - "loss": 0.617, + "epoch": 0.47, + "grad_norm": 0.8434310508210648, + "learning_rate": 5.657140560816362e-06, + "loss": 0.4575, "step": 6684 }, { - "epoch": 0.7, - "grad_norm": 2.760541069813522, - "learning_rate": 2.134483792012656e-06, - "loss": 0.6546, + "epoch": 0.47, + "grad_norm": 2.471254865073264, + "learning_rate": 5.656001344717954e-06, + "loss": 0.6009, "step": 6685 }, { - "epoch": 0.7, - "grad_norm": 2.2370760853157, - "learning_rate": 2.1330873639816125e-06, - "loss": 0.5493, + "epoch": 0.47, + "grad_norm": 1.7711996708237348, + "learning_rate": 5.654862093967334e-06, + "loss": 0.5492, "step": 6686 }, { - "epoch": 0.7, - "grad_norm": 2.9935072593068037, - "learning_rate": 2.131691269020193e-06, - "loss": 0.556, + "epoch": 0.47, + "grad_norm": 1.71597050170929, + "learning_rate": 5.653722808624678e-06, + "loss": 0.6096, "step": 6687 }, { - "epoch": 0.7, - "grad_norm": 2.0100434248241155, - "learning_rate": 2.130295507290591e-06, - "loss": 0.6344, + "epoch": 0.47, + "grad_norm": 2.5364878131373056, + "learning_rate": 5.652583488750166e-06, + "loss": 0.5212, "step": 6688 }, { - "epoch": 0.7, - "grad_norm": 2.244649889387862, - "learning_rate": 2.1289000789549586e-06, - "loss": 0.5579, + "epoch": 0.47, + "grad_norm": 1.8745468182164633, + "learning_rate": 5.6514441344039864e-06, + "loss": 0.5863, "step": 6689 }, { - "epoch": 0.7, - "grad_norm": 2.704981131283807, - "learning_rate": 2.1275049841754165e-06, - "loss": 0.5746, + "epoch": 0.47, + "grad_norm": 2.8602776126487477, + "learning_rate": 5.650304745646318e-06, + "loss": 0.6187, "step": 6690 }, { - "epoch": 0.7, - "grad_norm": 2.29539621168737, - "learning_rate": 2.126110223114043e-06, - "loss": 0.6104, + "epoch": 0.47, + "grad_norm": 1.3739225366926313, + "learning_rate": 5.649165322537351e-06, + "loss": 0.5463, "step": 6691 }, { - "epoch": 0.7, - "grad_norm": 2.205856297808691, - "learning_rate": 2.1247157959328763e-06, - "loss": 0.5662, + "epoch": 0.47, + "grad_norm": 1.8024861603560671, + "learning_rate": 5.64802586513727e-06, + "loss": 0.5755, "step": 6692 }, { - "epoch": 0.7, - "grad_norm": 0.8836298348060035, - "learning_rate": 2.1233217027939153e-06, - "loss": 0.562, + "epoch": 0.47, + "grad_norm": 1.5211624614073798, + "learning_rate": 5.646886373506267e-06, + "loss": 0.5807, "step": 6693 }, { - "epoch": 0.7, - "grad_norm": 2.3976000029984372, - "learning_rate": 2.121927943859123e-06, - "loss": 0.6278, + "epoch": 0.48, + "grad_norm": 1.4775770558716181, + "learning_rate": 5.645746847704535e-06, + "loss": 0.4892, "step": 6694 }, { - "epoch": 0.7, - "grad_norm": 2.331546227933603, - "learning_rate": 2.1205345192904224e-06, - "loss": 0.6523, + "epoch": 0.48, + "grad_norm": 1.948368889162901, + "learning_rate": 5.644607287792267e-06, + "loss": 0.5447, "step": 6695 }, { - "epoch": 0.7, - "grad_norm": 2.3605376267410083, - "learning_rate": 2.1191414292497e-06, - "loss": 0.6496, + "epoch": 0.48, + "grad_norm": 1.451366694437685, + "learning_rate": 5.643467693829656e-06, + "loss": 0.5134, "step": 6696 }, { - "epoch": 0.7, - "grad_norm": 2.102974401678194, - "learning_rate": 2.1177486738987984e-06, - "loss": 0.6491, + "epoch": 0.48, + "grad_norm": 1.562089884567449, + "learning_rate": 5.642328065876903e-06, + "loss": 0.5572, "step": 6697 }, { - "epoch": 0.7, - "grad_norm": 2.26075427680184, - "learning_rate": 2.116356253399522e-06, - "loss": 0.5821, + "epoch": 0.48, + "grad_norm": 1.643358385293917, + "learning_rate": 5.641188403994205e-06, + "loss": 0.5395, "step": 6698 }, { - "epoch": 0.7, - "grad_norm": 0.8908330043379147, - "learning_rate": 2.114964167913641e-06, - "loss": 0.537, + "epoch": 0.48, + "grad_norm": 1.526710648041879, + "learning_rate": 5.640048708241761e-06, + "loss": 0.5506, "step": 6699 }, { - "epoch": 0.71, - "grad_norm": 2.579961013538668, - "learning_rate": 2.1135724176028844e-06, - "loss": 0.5184, + "epoch": 0.48, + "grad_norm": 1.4821416483061174, + "learning_rate": 5.638908978679776e-06, + "loss": 0.5356, "step": 6700 }, { - "epoch": 0.71, - "grad_norm": 1.08377361893251, - "learning_rate": 2.1121810026289404e-06, - "loss": 0.544, + "epoch": 0.48, + "grad_norm": 1.7516470699889475, + "learning_rate": 5.6377692153684545e-06, + "loss": 0.5504, "step": 6701 }, { - "epoch": 0.71, - "grad_norm": 6.241375532358382, - "learning_rate": 2.110789923153458e-06, - "loss": 0.6789, + "epoch": 0.48, + "grad_norm": 1.661343665489507, + "learning_rate": 5.636629418368001e-06, + "loss": 0.5706, "step": 6702 }, { - "epoch": 0.71, - "grad_norm": 2.9160714063650266, - "learning_rate": 2.109399179338051e-06, - "loss": 0.5792, + "epoch": 0.48, + "grad_norm": 1.81514120744629, + "learning_rate": 5.635489587738626e-06, + "loss": 0.5577, "step": 6703 }, { - "epoch": 0.71, - "grad_norm": 2.1537715668446755, - "learning_rate": 2.1080087713442928e-06, - "loss": 0.5489, + "epoch": 0.48, + "grad_norm": 0.7722265479031758, + "learning_rate": 5.634349723540536e-06, + "loss": 0.4694, "step": 6704 }, { - "epoch": 0.71, - "grad_norm": 2.053631859586391, - "learning_rate": 2.1066186993337158e-06, - "loss": 0.5573, + "epoch": 0.48, + "grad_norm": 1.7800201203746095, + "learning_rate": 5.6332098258339455e-06, + "loss": 0.5434, "step": 6705 }, { - "epoch": 0.71, - "grad_norm": 5.425494797474714, - "learning_rate": 2.105228963467812e-06, - "loss": 0.5586, + "epoch": 0.48, + "grad_norm": 3.495232116267956, + "learning_rate": 5.632069894679066e-06, + "loss": 0.5621, "step": 6706 }, { - "epoch": 0.71, - "grad_norm": 2.748837747672611, - "learning_rate": 2.10383956390804e-06, - "loss": 0.5483, + "epoch": 0.48, + "grad_norm": 1.5695256463797158, + "learning_rate": 5.630929930136113e-06, + "loss": 0.5224, "step": 6707 }, { - "epoch": 0.71, - "grad_norm": 2.2002541684184727, - "learning_rate": 2.1024505008158153e-06, - "loss": 0.6831, + "epoch": 0.48, + "grad_norm": 2.003190247778446, + "learning_rate": 5.629789932265303e-06, + "loss": 0.5658, "step": 6708 }, { - "epoch": 0.71, - "grad_norm": 2.309736548355432, - "learning_rate": 2.101061774352517e-06, - "loss": 0.5418, + "epoch": 0.48, + "grad_norm": 1.4172815957643492, + "learning_rate": 5.628649901126854e-06, + "loss": 0.5272, "step": 6709 }, { - "epoch": 0.71, - "grad_norm": 2.4178982369090267, - "learning_rate": 2.099673384679482e-06, - "loss": 0.5158, + "epoch": 0.48, + "grad_norm": 1.5351502237291679, + "learning_rate": 5.627509836780988e-06, + "loss": 0.5586, "step": 6710 }, { - "epoch": 0.71, - "grad_norm": 2.18817854947126, - "learning_rate": 2.0982853319580075e-06, - "loss": 0.5532, + "epoch": 0.48, + "grad_norm": 1.9028130184144265, + "learning_rate": 5.626369739287926e-06, + "loss": 0.5328, "step": 6711 }, { - "epoch": 0.71, - "grad_norm": 15.098923040125959, - "learning_rate": 2.096897616349355e-06, - "loss": 0.6052, + "epoch": 0.48, + "grad_norm": 1.6026336373450234, + "learning_rate": 5.6252296087078915e-06, + "loss": 0.5241, "step": 6712 }, { - "epoch": 0.71, - "grad_norm": 4.248992003023729, - "learning_rate": 2.0955102380147474e-06, - "loss": 0.6149, + "epoch": 0.48, + "grad_norm": 1.4796314558332737, + "learning_rate": 5.624089445101111e-06, + "loss": 0.4669, "step": 6713 }, { - "epoch": 0.71, - "grad_norm": 5.785961960691239, - "learning_rate": 2.0941231971153644e-06, - "loss": 0.6334, + "epoch": 0.48, + "grad_norm": 1.634657431186811, + "learning_rate": 5.6229492485278115e-06, + "loss": 0.6167, "step": 6714 }, { - "epoch": 0.71, - "grad_norm": 2.878088474717053, - "learning_rate": 2.0927364938123457e-06, - "loss": 0.6535, + "epoch": 0.48, + "grad_norm": 1.9248752977447796, + "learning_rate": 5.6218090190482215e-06, + "loss": 0.554, "step": 6715 }, { - "epoch": 0.71, - "grad_norm": 2.4454378764029507, - "learning_rate": 2.0913501282667975e-06, - "loss": 0.6169, + "epoch": 0.48, + "grad_norm": 1.5093216633969484, + "learning_rate": 5.620668756722572e-06, + "loss": 0.6172, "step": 6716 }, { - "epoch": 0.71, - "grad_norm": 3.1028098404336815, - "learning_rate": 2.0899641006397836e-06, - "loss": 0.6849, + "epoch": 0.48, + "grad_norm": 1.7937289011555313, + "learning_rate": 5.619528461611095e-06, + "loss": 0.5091, "step": 6717 }, { - "epoch": 0.71, - "grad_norm": 2.5675573992295457, - "learning_rate": 2.0885784110923325e-06, - "loss": 0.6658, + "epoch": 0.48, + "grad_norm": 1.9766599509201979, + "learning_rate": 5.6183881337740275e-06, + "loss": 0.5379, "step": 6718 }, { - "epoch": 0.71, - "grad_norm": 3.0159340806610255, - "learning_rate": 2.087193059785421e-06, - "loss": 0.6471, + "epoch": 0.48, + "grad_norm": 1.7876569669256506, + "learning_rate": 5.617247773271602e-06, + "loss": 0.5427, "step": 6719 }, { - "epoch": 0.71, - "grad_norm": 2.879084843294698, - "learning_rate": 2.08580804688e-06, - "loss": 0.5826, + "epoch": 0.48, + "grad_norm": 1.7822541035915287, + "learning_rate": 5.616107380164056e-06, + "loss": 0.5211, "step": 6720 }, { - "epoch": 0.71, - "grad_norm": 2.7524142612645415, - "learning_rate": 2.084423372536976e-06, - "loss": 0.6057, + "epoch": 0.48, + "grad_norm": 2.226603301310994, + "learning_rate": 5.614966954511634e-06, + "loss": 0.6159, "step": 6721 }, { - "epoch": 0.71, - "grad_norm": 3.2595597192294785, - "learning_rate": 2.083039036917219e-06, - "loss": 0.6349, + "epoch": 0.48, + "grad_norm": 1.6247178116912122, + "learning_rate": 5.613826496374571e-06, + "loss": 0.5106, "step": 6722 }, { - "epoch": 0.71, - "grad_norm": 3.6944542500831257, - "learning_rate": 2.0816550401815538e-06, - "loss": 0.5963, + "epoch": 0.48, + "grad_norm": 1.5369539080308863, + "learning_rate": 5.612686005813113e-06, + "loss": 0.5685, "step": 6723 }, { - "epoch": 0.71, - "grad_norm": 2.1565697221035833, - "learning_rate": 2.0802713824907683e-06, - "loss": 0.5843, + "epoch": 0.48, + "grad_norm": 0.8021381772109202, + "learning_rate": 5.611545482887504e-06, + "loss": 0.4405, "step": 6724 }, { - "epoch": 0.71, - "grad_norm": 2.4041247947594564, - "learning_rate": 2.0788880640056137e-06, - "loss": 0.5929, + "epoch": 0.48, + "grad_norm": 2.1938146792378173, + "learning_rate": 5.610404927657992e-06, + "loss": 0.5226, "step": 6725 }, { - "epoch": 0.71, - "grad_norm": 2.6733679379720794, - "learning_rate": 2.077505084886802e-06, - "loss": 0.6643, + "epoch": 0.48, + "grad_norm": 1.9030832343107824, + "learning_rate": 5.609264340184822e-06, + "loss": 0.5453, "step": 6726 }, { - "epoch": 0.71, - "grad_norm": 2.2387112348073126, - "learning_rate": 2.0761224452950003e-06, - "loss": 0.6349, + "epoch": 0.48, + "grad_norm": 1.7832316370095276, + "learning_rate": 5.6081237205282455e-06, + "loss": 0.5148, "step": 6727 }, { - "epoch": 0.71, - "grad_norm": 2.8590997656724713, - "learning_rate": 2.07474014539084e-06, - "loss": 0.6274, + "epoch": 0.48, + "grad_norm": 1.902862943683918, + "learning_rate": 5.6069830687485135e-06, + "loss": 0.5383, "step": 6728 }, { - "epoch": 0.71, - "grad_norm": 2.6204018448125233, - "learning_rate": 2.0733581853349128e-06, - "loss": 0.699, + "epoch": 0.48, + "grad_norm": 1.6308617272819366, + "learning_rate": 5.605842384905878e-06, + "loss": 0.5673, "step": 6729 }, { - "epoch": 0.71, - "grad_norm": 3.7073322849788988, - "learning_rate": 2.071976565287772e-06, - "loss": 0.553, + "epoch": 0.48, + "grad_norm": 1.8887258072034738, + "learning_rate": 5.604701669060596e-06, + "loss": 0.5082, "step": 6730 }, { - "epoch": 0.71, - "grad_norm": 2.0723138136947097, - "learning_rate": 2.0705952854099337e-06, - "loss": 0.5749, + "epoch": 0.48, + "grad_norm": 1.6925602926813255, + "learning_rate": 5.603560921272923e-06, + "loss": 0.5271, "step": 6731 }, { - "epoch": 0.71, - "grad_norm": 2.9024005003511815, - "learning_rate": 2.069214345861863e-06, - "loss": 0.5859, + "epoch": 0.48, + "grad_norm": 2.233038556947561, + "learning_rate": 5.602420141603116e-06, + "loss": 0.6031, "step": 6732 }, { - "epoch": 0.71, - "grad_norm": 3.007051839789599, - "learning_rate": 2.067833746803998e-06, - "loss": 0.5448, + "epoch": 0.48, + "grad_norm": 1.7498356445548902, + "learning_rate": 5.601279330111438e-06, + "loss": 0.547, "step": 6733 }, { - "epoch": 0.71, - "grad_norm": 2.16193794853113, - "learning_rate": 2.0664534883967315e-06, - "loss": 0.6046, + "epoch": 0.48, + "grad_norm": 1.5183404627740058, + "learning_rate": 5.600138486858147e-06, + "loss": 0.5039, "step": 6734 }, { - "epoch": 0.71, - "grad_norm": 2.0022011347301887, - "learning_rate": 2.065073570800421e-06, - "loss": 0.6004, + "epoch": 0.48, + "grad_norm": 1.8864256386492217, + "learning_rate": 5.598997611903507e-06, + "loss": 0.6072, "step": 6735 }, { - "epoch": 0.71, - "grad_norm": 2.5866230804925356, - "learning_rate": 2.0636939941753793e-06, - "loss": 0.6142, + "epoch": 0.48, + "grad_norm": 1.7563920701860751, + "learning_rate": 5.597856705307782e-06, + "loss": 0.5296, "step": 6736 }, { - "epoch": 0.71, - "grad_norm": 0.9455346482011558, - "learning_rate": 2.0623147586818786e-06, - "loss": 0.5795, + "epoch": 0.48, + "grad_norm": 4.9910982831742405, + "learning_rate": 5.596715767131242e-06, + "loss": 0.5293, "step": 6737 }, { - "epoch": 0.71, - "grad_norm": 2.8544304813633348, - "learning_rate": 2.060935864480158e-06, - "loss": 0.6272, + "epoch": 0.48, + "grad_norm": 1.6921088304563325, + "learning_rate": 5.595574797434154e-06, + "loss": 0.6174, "step": 6738 }, { - "epoch": 0.71, - "grad_norm": 3.139954944371839, - "learning_rate": 2.0595573117304147e-06, - "loss": 0.5931, + "epoch": 0.48, + "grad_norm": 1.6876155591700581, + "learning_rate": 5.594433796276786e-06, + "loss": 0.4984, "step": 6739 }, { - "epoch": 0.71, - "grad_norm": 2.141121600735202, - "learning_rate": 2.0581791005928024e-06, - "loss": 0.5734, + "epoch": 0.48, + "grad_norm": 2.343848982639149, + "learning_rate": 5.593292763719411e-06, + "loss": 0.4824, "step": 6740 }, { - "epoch": 0.71, - "grad_norm": 2.1663356934725844, - "learning_rate": 2.0568012312274367e-06, - "loss": 0.4842, + "epoch": 0.48, + "grad_norm": 1.6408076631830786, + "learning_rate": 5.592151699822301e-06, + "loss": 0.5104, "step": 6741 }, { - "epoch": 0.71, - "grad_norm": 2.1593137328818948, - "learning_rate": 2.0554237037943966e-06, - "loss": 0.5317, + "epoch": 0.48, + "grad_norm": 1.7727616538646864, + "learning_rate": 5.591010604645733e-06, + "loss": 0.5472, "step": 6742 }, { - "epoch": 0.71, - "grad_norm": 4.225299678738348, - "learning_rate": 2.054046518453718e-06, - "loss": 0.6648, + "epoch": 0.48, + "grad_norm": 1.6048311605043697, + "learning_rate": 5.589869478249983e-06, + "loss": 0.5722, "step": 6743 }, { - "epoch": 0.71, - "grad_norm": 2.46890585145027, - "learning_rate": 2.0526696753654008e-06, - "loss": 0.648, + "epoch": 0.48, + "grad_norm": 1.7017395453743611, + "learning_rate": 5.588728320695326e-06, + "loss": 0.6008, "step": 6744 }, { - "epoch": 0.71, - "grad_norm": 8.384393534696619, - "learning_rate": 2.051293174689401e-06, - "loss": 0.6517, + "epoch": 0.48, + "grad_norm": 1.6883454991776168, + "learning_rate": 5.587587132042045e-06, + "loss": 0.5387, "step": 6745 }, { - "epoch": 0.71, - "grad_norm": 2.3553993298810063, - "learning_rate": 2.0499170165856343e-06, - "loss": 0.5796, + "epoch": 0.48, + "grad_norm": 0.7678308716911999, + "learning_rate": 5.586445912350422e-06, + "loss": 0.4691, "step": 6746 }, { - "epoch": 0.71, - "grad_norm": 3.2778199669618373, - "learning_rate": 2.048541201213981e-06, - "loss": 0.6517, + "epoch": 0.48, + "grad_norm": 1.834197528848065, + "learning_rate": 5.585304661680737e-06, + "loss": 0.5729, "step": 6747 }, { - "epoch": 0.71, - "grad_norm": 2.1601593601154647, - "learning_rate": 2.0471657287342813e-06, - "loss": 0.598, + "epoch": 0.48, + "grad_norm": 1.5884467802179805, + "learning_rate": 5.584163380093278e-06, + "loss": 0.4832, "step": 6748 }, { - "epoch": 0.71, - "grad_norm": 2.7950345405844166, - "learning_rate": 2.0457905993063306e-06, - "loss": 0.6188, + "epoch": 0.48, + "grad_norm": 0.7336702916786736, + "learning_rate": 5.5830220676483295e-06, + "loss": 0.4558, "step": 6749 }, { - "epoch": 0.71, - "grad_norm": 2.3229400348434717, - "learning_rate": 2.044415813089887e-06, + "epoch": 0.48, + "grad_norm": 1.595138903239873, + "learning_rate": 5.581880724406179e-06, "loss": 0.5894, "step": 6750 }, { - "epoch": 0.71, - "grad_norm": 2.5323121960930086, - "learning_rate": 2.0430413702446707e-06, - "loss": 0.6715, + "epoch": 0.48, + "grad_norm": 1.7617260242785975, + "learning_rate": 5.580739350427117e-06, + "loss": 0.5848, "step": 6751 }, { - "epoch": 0.71, - "grad_norm": 2.1152568643865597, - "learning_rate": 2.0416672709303597e-06, - "loss": 0.591, + "epoch": 0.48, + "grad_norm": 1.62754895196623, + "learning_rate": 5.579597945771435e-06, + "loss": 0.5621, "step": 6752 }, { - "epoch": 0.71, - "grad_norm": 3.241190616242863, - "learning_rate": 2.0402935153065976e-06, - "loss": 0.5456, + "epoch": 0.48, + "grad_norm": 1.6425514376228831, + "learning_rate": 5.578456510499426e-06, + "loss": 0.5657, "step": 6753 }, { - "epoch": 0.71, - "grad_norm": 2.8952230145025495, - "learning_rate": 2.0389201035329754e-06, - "loss": 0.6587, + "epoch": 0.48, + "grad_norm": 1.8205366874940738, + "learning_rate": 5.577315044671383e-06, + "loss": 0.4811, "step": 6754 }, { - "epoch": 0.71, - "grad_norm": 4.913524657839511, - "learning_rate": 2.0375470357690564e-06, - "loss": 0.6103, + "epoch": 0.48, + "grad_norm": 1.796036794777471, + "learning_rate": 5.576173548347604e-06, + "loss": 0.5634, "step": 6755 }, { - "epoch": 0.71, - "grad_norm": 3.357411676416585, - "learning_rate": 2.036174312174359e-06, - "loss": 0.6013, + "epoch": 0.48, + "grad_norm": 4.822329382621104, + "learning_rate": 5.5750320215883855e-06, + "loss": 0.5198, "step": 6756 }, { - "epoch": 0.71, - "grad_norm": 2.164348116661981, - "learning_rate": 2.034801932908364e-06, - "loss": 0.641, + "epoch": 0.48, + "grad_norm": 1.6373306573904345, + "learning_rate": 5.5738904644540285e-06, + "loss": 0.6103, "step": 6757 }, { - "epoch": 0.71, - "grad_norm": 2.285714519634651, - "learning_rate": 2.033429898130509e-06, - "loss": 0.5707, + "epoch": 0.48, + "grad_norm": 1.416933995385322, + "learning_rate": 5.572748877004831e-06, + "loss": 0.557, "step": 6758 }, { - "epoch": 0.71, - "grad_norm": 2.314588495421655, - "learning_rate": 2.032058208000191e-06, - "loss": 0.6353, + "epoch": 0.48, + "grad_norm": 1.722279640683044, + "learning_rate": 5.571607259301097e-06, + "loss": 0.5793, "step": 6759 }, { - "epoch": 0.71, - "grad_norm": 3.1963968095024904, - "learning_rate": 2.030686862676771e-06, - "loss": 0.6579, + "epoch": 0.48, + "grad_norm": 1.4293868683625621, + "learning_rate": 5.570465611403131e-06, + "loss": 0.5161, "step": 6760 }, { - "epoch": 0.71, - "grad_norm": 2.234190092875219, - "learning_rate": 2.0293158623195702e-06, - "loss": 0.6601, + "epoch": 0.48, + "grad_norm": 1.7016478609618106, + "learning_rate": 5.569323933371239e-06, + "loss": 0.5731, "step": 6761 }, { - "epoch": 0.71, - "grad_norm": 2.350692178674305, - "learning_rate": 2.0279452070878647e-06, - "loss": 0.6188, + "epoch": 0.48, + "grad_norm": 1.869296411801194, + "learning_rate": 5.568182225265727e-06, + "loss": 0.549, "step": 6762 }, { - "epoch": 0.71, - "grad_norm": 2.564102840538574, - "learning_rate": 2.026574897140892e-06, - "loss": 0.6149, + "epoch": 0.48, + "grad_norm": 0.707212949528794, + "learning_rate": 5.567040487146905e-06, + "loss": 0.4528, "step": 6763 }, { - "epoch": 0.71, - "grad_norm": 2.174466257356336, - "learning_rate": 2.0252049326378524e-06, - "loss": 0.605, + "epoch": 0.48, + "grad_norm": 2.04319955771742, + "learning_rate": 5.565898719075083e-06, + "loss": 0.5866, "step": 6764 }, { - "epoch": 0.71, - "grad_norm": 2.149315755348158, - "learning_rate": 2.0238353137379047e-06, - "loss": 0.6217, + "epoch": 0.48, + "grad_norm": 1.5474852511178936, + "learning_rate": 5.564756921110572e-06, + "loss": 0.528, "step": 6765 }, { - "epoch": 0.71, - "grad_norm": 2.22159691006164, - "learning_rate": 2.02246604060017e-06, - "loss": 0.4916, + "epoch": 0.48, + "grad_norm": 1.665021536094839, + "learning_rate": 5.563615093313688e-06, + "loss": 0.5528, "step": 6766 }, { - "epoch": 0.71, - "grad_norm": 2.0623059295189003, - "learning_rate": 2.0210971133837208e-06, - "loss": 0.6159, + "epoch": 0.48, + "grad_norm": 1.6935142618410886, + "learning_rate": 5.562473235744745e-06, + "loss": 0.5782, "step": 6767 }, { - "epoch": 0.71, - "grad_norm": 5.244551913598113, - "learning_rate": 2.0197285322475975e-06, - "loss": 0.5866, + "epoch": 0.48, + "grad_norm": 1.8740807746791552, + "learning_rate": 5.56133134846406e-06, + "loss": 0.5431, "step": 6768 }, { - "epoch": 0.71, - "grad_norm": 3.429308309812128, - "learning_rate": 2.0183602973507977e-06, - "loss": 0.6469, + "epoch": 0.48, + "grad_norm": 1.5943006582090593, + "learning_rate": 5.56018943153195e-06, + "loss": 0.5007, "step": 6769 }, { - "epoch": 0.71, - "grad_norm": 2.2426310766438093, - "learning_rate": 2.016992408852282e-06, - "loss": 0.6122, + "epoch": 0.48, + "grad_norm": 1.4988623488863015, + "learning_rate": 5.559047485008737e-06, + "loss": 0.5419, "step": 6770 }, { - "epoch": 0.71, - "grad_norm": 3.147899821152736, - "learning_rate": 2.0156248669109645e-06, - "loss": 0.6084, + "epoch": 0.48, + "grad_norm": 1.5693618060041399, + "learning_rate": 5.5579055089547415e-06, + "loss": 0.581, "step": 6771 }, { - "epoch": 0.71, - "grad_norm": 2.5207952773245057, - "learning_rate": 2.014257671685722e-06, - "loss": 0.6181, + "epoch": 0.48, + "grad_norm": 1.6730799642614054, + "learning_rate": 5.556763503430287e-06, + "loss": 0.5033, "step": 6772 }, { - "epoch": 0.71, - "grad_norm": 2.290492088782644, - "learning_rate": 2.012890823335392e-06, - "loss": 0.6861, + "epoch": 0.48, + "grad_norm": 1.498046471523589, + "learning_rate": 5.5556214684956966e-06, + "loss": 0.4576, "step": 6773 }, { - "epoch": 0.71, - "grad_norm": 0.9788946060137566, - "learning_rate": 2.011524322018773e-06, - "loss": 0.5617, + "epoch": 0.48, + "grad_norm": 1.9530842819853285, + "learning_rate": 5.5544794042112985e-06, + "loss": 0.5607, "step": 6774 }, { - "epoch": 0.71, - "grad_norm": 2.02202729427753, - "learning_rate": 2.01015816789462e-06, - "loss": 0.5491, + "epoch": 0.48, + "grad_norm": 2.292845483940485, + "learning_rate": 5.5533373106374176e-06, + "loss": 0.5994, "step": 6775 }, { - "epoch": 0.71, - "grad_norm": 2.644055938189567, - "learning_rate": 2.0087923611216452e-06, - "loss": 0.6326, + "epoch": 0.48, + "grad_norm": 1.8969872555806475, + "learning_rate": 5.552195187834387e-06, + "loss": 0.5815, "step": 6776 }, { - "epoch": 0.71, - "grad_norm": 2.6229423897634088, - "learning_rate": 2.0074269018585286e-06, - "loss": 0.6593, + "epoch": 0.48, + "grad_norm": 1.6811661008158028, + "learning_rate": 5.551053035862535e-06, + "loss": 0.5071, "step": 6777 }, { - "epoch": 0.71, - "grad_norm": 2.657797931574321, - "learning_rate": 2.006061790263903e-06, - "loss": 0.6879, + "epoch": 0.48, + "grad_norm": 1.8156736446530535, + "learning_rate": 5.549910854782195e-06, + "loss": 0.5898, "step": 6778 }, { - "epoch": 0.71, - "grad_norm": 3.731539046733333, - "learning_rate": 2.004697026496366e-06, - "loss": 0.6025, + "epoch": 0.48, + "grad_norm": 1.7075229013231694, + "learning_rate": 5.548768644653699e-06, + "loss": 0.5374, "step": 6779 }, { - "epoch": 0.71, - "grad_norm": 2.3300238922256162, - "learning_rate": 2.00333261071447e-06, - "loss": 0.5737, + "epoch": 0.48, + "grad_norm": 1.6251682128346123, + "learning_rate": 5.547626405537384e-06, + "loss": 0.5579, "step": 6780 }, { - "epoch": 0.71, - "grad_norm": 2.5105007437944, - "learning_rate": 2.001968543076727e-06, - "loss": 0.5984, + "epoch": 0.48, + "grad_norm": 1.5516764058741341, + "learning_rate": 5.546484137493586e-06, + "loss": 0.5812, "step": 6781 }, { - "epoch": 0.71, - "grad_norm": 2.9355723240439238, - "learning_rate": 2.0006048237416127e-06, - "loss": 0.6853, + "epoch": 0.48, + "grad_norm": 2.1734460812534584, + "learning_rate": 5.545341840582645e-06, + "loss": 0.5461, "step": 6782 }, { - "epoch": 0.71, - "grad_norm": 2.7838842692244414, - "learning_rate": 1.9992414528675607e-06, - "loss": 0.5972, + "epoch": 0.48, + "grad_norm": 1.887477114593371, + "learning_rate": 5.544199514864901e-06, + "loss": 0.4825, "step": 6783 }, { - "epoch": 0.71, - "grad_norm": 2.409380507657977, - "learning_rate": 1.997878430612963e-06, - "loss": 0.6305, + "epoch": 0.48, + "grad_norm": 2.203025909311198, + "learning_rate": 5.543057160400693e-06, + "loss": 0.53, "step": 6784 }, { - "epoch": 0.71, - "grad_norm": 2.610091326027608, - "learning_rate": 1.9965157571361688e-06, - "loss": 0.6465, + "epoch": 0.48, + "grad_norm": 1.6514827601645161, + "learning_rate": 5.541914777250367e-06, + "loss": 0.5202, "step": 6785 }, { - "epoch": 0.71, - "grad_norm": 2.0465580485685355, - "learning_rate": 1.9951534325954913e-06, - "loss": 0.5599, + "epoch": 0.48, + "grad_norm": 0.8650873848282368, + "learning_rate": 5.540772365474265e-06, + "loss": 0.4424, "step": 6786 }, { - "epoch": 0.71, - "grad_norm": 2.7696890221497825, - "learning_rate": 1.9937914571492024e-06, - "loss": 0.5447, + "epoch": 0.48, + "grad_norm": 1.5945055039627944, + "learning_rate": 5.5396299251327355e-06, + "loss": 0.5389, "step": 6787 }, { - "epoch": 0.71, - "grad_norm": 2.854812905102868, - "learning_rate": 1.9924298309555355e-06, - "loss": 0.5373, + "epoch": 0.48, + "grad_norm": 1.7700858582581704, + "learning_rate": 5.538487456286123e-06, + "loss": 0.5853, "step": 6788 }, { - "epoch": 0.71, - "grad_norm": 4.144295935862878, - "learning_rate": 1.991068554172673e-06, - "loss": 0.5129, + "epoch": 0.48, + "grad_norm": 1.7301410694556476, + "learning_rate": 5.53734495899478e-06, + "loss": 0.614, "step": 6789 }, { - "epoch": 0.71, - "grad_norm": 2.235492207734396, - "learning_rate": 1.9897076269587686e-06, - "loss": 0.5211, + "epoch": 0.48, + "grad_norm": 1.9595533560651899, + "learning_rate": 5.536202433319055e-06, + "loss": 0.6245, "step": 6790 }, { - "epoch": 0.71, - "grad_norm": 1.0026420194820682, - "learning_rate": 1.98834704947193e-06, - "loss": 0.5368, + "epoch": 0.48, + "grad_norm": 1.935518135846989, + "learning_rate": 5.535059879319301e-06, + "loss": 0.5453, "step": 6791 }, { - "epoch": 0.71, - "grad_norm": 4.2741376437674825, - "learning_rate": 1.9869868218702266e-06, - "loss": 0.6746, + "epoch": 0.48, + "grad_norm": 1.5655703112249182, + "learning_rate": 5.533917297055871e-06, + "loss": 0.5285, "step": 6792 }, { - "epoch": 0.71, - "grad_norm": 2.4315638104365025, - "learning_rate": 1.985626944311685e-06, - "loss": 0.4932, + "epoch": 0.48, + "grad_norm": 1.9802205881281012, + "learning_rate": 5.53277468658912e-06, + "loss": 0.5937, "step": 6793 }, { - "epoch": 0.71, - "grad_norm": 3.005978231078567, - "learning_rate": 1.984267416954289e-06, - "loss": 0.694, + "epoch": 0.48, + "grad_norm": 1.8127084526071489, + "learning_rate": 5.531632047979405e-06, + "loss": 0.5657, "step": 6794 }, { - "epoch": 0.72, - "grad_norm": 0.9172999980041083, - "learning_rate": 1.9829082399559872e-06, - "loss": 0.5798, + "epoch": 0.48, + "grad_norm": 2.4063874677030097, + "learning_rate": 5.530489381287083e-06, + "loss": 0.5621, "step": 6795 }, { - "epoch": 0.72, - "grad_norm": 2.2051339131239667, - "learning_rate": 1.9815494134746866e-06, - "loss": 0.6441, + "epoch": 0.48, + "grad_norm": 1.6494133433579787, + "learning_rate": 5.5293466865725145e-06, + "loss": 0.5147, "step": 6796 }, { - "epoch": 0.72, - "grad_norm": 2.4405798502942733, - "learning_rate": 1.98019093766825e-06, - "loss": 0.6649, + "epoch": 0.48, + "grad_norm": 1.56398788795143, + "learning_rate": 5.528203963896062e-06, + "loss": 0.4594, "step": 6797 }, { - "epoch": 0.72, - "grad_norm": 2.505836213580318, - "learning_rate": 1.9788328126944984e-06, - "loss": 0.5017, + "epoch": 0.48, + "grad_norm": 1.6814529419823439, + "learning_rate": 5.527061213318084e-06, + "loss": 0.561, "step": 6798 }, { - "epoch": 0.72, - "grad_norm": 2.5383152998963965, - "learning_rate": 1.9774750387112176e-06, - "loss": 0.6945, + "epoch": 0.48, + "grad_norm": 1.404081593739727, + "learning_rate": 5.525918434898949e-06, + "loss": 0.4557, "step": 6799 }, { - "epoch": 0.72, - "grad_norm": 2.295043766361787, - "learning_rate": 1.976117615876149e-06, - "loss": 0.5951, + "epoch": 0.48, + "grad_norm": 1.7510986808564242, + "learning_rate": 5.524775628699018e-06, + "loss": 0.5587, "step": 6800 }, { - "epoch": 0.72, - "grad_norm": 3.0818322840049546, - "learning_rate": 1.974760544346999e-06, - "loss": 0.6048, + "epoch": 0.48, + "grad_norm": 1.675060987465966, + "learning_rate": 5.523632794778661e-06, + "loss": 0.5344, "step": 6801 }, { - "epoch": 0.72, - "grad_norm": 2.1718689507776063, - "learning_rate": 1.9734038242814203e-06, - "loss": 0.5392, + "epoch": 0.48, + "grad_norm": 2.5897658033206574, + "learning_rate": 5.522489933198246e-06, + "loss": 0.6444, "step": 6802 }, { - "epoch": 0.72, - "grad_norm": 4.5743079582613, - "learning_rate": 1.9720474558370356e-06, - "loss": 0.6438, + "epoch": 0.48, + "grad_norm": 1.6794618340678447, + "learning_rate": 5.521347044018142e-06, + "loss": 0.5263, "step": 6803 }, { - "epoch": 0.72, - "grad_norm": 2.4649750346849424, - "learning_rate": 1.970691439171425e-06, - "loss": 0.5826, + "epoch": 0.48, + "grad_norm": 0.7831654878341199, + "learning_rate": 5.520204127298721e-06, + "loss": 0.4393, "step": 6804 }, { - "epoch": 0.72, - "grad_norm": 2.251453377955877, - "learning_rate": 1.9693357744421282e-06, - "loss": 0.6129, + "epoch": 0.48, + "grad_norm": 1.7286817537015349, + "learning_rate": 5.519061183100354e-06, + "loss": 0.4908, "step": 6805 }, { - "epoch": 0.72, - "grad_norm": 2.7067846238307616, - "learning_rate": 1.96798046180664e-06, - "loss": 0.5928, + "epoch": 0.48, + "grad_norm": 1.7763016868135992, + "learning_rate": 5.517918211483418e-06, + "loss": 0.5308, "step": 6806 }, { - "epoch": 0.72, - "grad_norm": 2.9657749101823887, - "learning_rate": 1.966625501422415e-06, - "loss": 0.6502, + "epoch": 0.48, + "grad_norm": 1.6141177023558428, + "learning_rate": 5.516775212508286e-06, + "loss": 0.5285, "step": 6807 }, { - "epoch": 0.72, - "grad_norm": 2.885763329620718, - "learning_rate": 1.965270893446871e-06, - "loss": 0.6607, + "epoch": 0.48, + "grad_norm": 1.9861679514805537, + "learning_rate": 5.515632186235338e-06, + "loss": 0.5492, "step": 6808 }, { - "epoch": 0.72, - "grad_norm": 2.516314689530151, - "learning_rate": 1.963916638037384e-06, - "loss": 0.6097, + "epoch": 0.48, + "grad_norm": 1.5193170720818678, + "learning_rate": 5.514489132724949e-06, + "loss": 0.4893, "step": 6809 }, { - "epoch": 0.72, - "grad_norm": 2.819130855149682, - "learning_rate": 1.9625627353512854e-06, - "loss": 0.647, + "epoch": 0.48, + "grad_norm": 1.5584436058190756, + "learning_rate": 5.513346052037501e-06, + "loss": 0.5561, "step": 6810 }, { - "epoch": 0.72, - "grad_norm": 3.1510683550167125, - "learning_rate": 1.9612091855458663e-06, - "loss": 0.5885, + "epoch": 0.48, + "grad_norm": 1.6889309523545417, + "learning_rate": 5.512202944233374e-06, + "loss": 0.5435, "step": 6811 }, { - "epoch": 0.72, - "grad_norm": 2.8987176112644457, - "learning_rate": 1.9598559887783797e-06, - "loss": 0.5884, + "epoch": 0.48, + "grad_norm": 2.642698019431285, + "learning_rate": 5.5110598093729535e-06, + "loss": 0.5387, "step": 6812 }, { - "epoch": 0.72, - "grad_norm": 2.9904600312949907, - "learning_rate": 1.958503145206036e-06, - "loss": 0.5806, + "epoch": 0.48, + "grad_norm": 1.5483372768789536, + "learning_rate": 5.509916647516622e-06, + "loss": 0.517, "step": 6813 }, { - "epoch": 0.72, - "grad_norm": 2.5080351376008374, - "learning_rate": 1.9571506549860065e-06, - "loss": 0.6038, + "epoch": 0.48, + "grad_norm": 1.5550859322787332, + "learning_rate": 5.508773458724765e-06, + "loss": 0.5804, "step": 6814 }, { - "epoch": 0.72, - "grad_norm": 2.939203442577235, - "learning_rate": 1.955798518275418e-06, - "loss": 0.5762, + "epoch": 0.48, + "grad_norm": 1.926270174379352, + "learning_rate": 5.50763024305777e-06, + "loss": 0.5633, "step": 6815 }, { - "epoch": 0.72, - "grad_norm": 2.228977059412211, - "learning_rate": 1.954446735231356e-06, - "loss": 0.5609, + "epoch": 0.48, + "grad_norm": 1.9076136517790978, + "learning_rate": 5.506487000576025e-06, + "loss": 0.4798, "step": 6816 }, { - "epoch": 0.72, - "grad_norm": 2.63612086695577, - "learning_rate": 1.953095306010869e-06, - "loss": 0.5973, + "epoch": 0.48, + "grad_norm": 1.7241485741579365, + "learning_rate": 5.50534373133992e-06, + "loss": 0.4936, "step": 6817 }, { - "epoch": 0.72, - "grad_norm": 2.463307921855391, - "learning_rate": 1.9517442307709626e-06, - "loss": 0.5971, + "epoch": 0.48, + "grad_norm": 0.7574308787698483, + "learning_rate": 5.504200435409848e-06, + "loss": 0.4542, "step": 6818 }, { - "epoch": 0.72, - "grad_norm": 1.9678254750031574, - "learning_rate": 1.9503935096686004e-06, - "loss": 0.5483, + "epoch": 0.48, + "grad_norm": 2.1523603404380585, + "learning_rate": 5.5030571128462005e-06, + "loss": 0.5958, "step": 6819 }, { - "epoch": 0.72, - "grad_norm": 2.4727076802540133, - "learning_rate": 1.9490431428607027e-06, - "loss": 0.626, + "epoch": 0.48, + "grad_norm": 1.66661416996218, + "learning_rate": 5.501913763709371e-06, + "loss": 0.487, "step": 6820 }, { - "epoch": 0.72, - "grad_norm": 2.0978475149269795, - "learning_rate": 1.947693130504153e-06, - "loss": 0.5705, + "epoch": 0.48, + "grad_norm": 1.9886661330289361, + "learning_rate": 5.500770388059757e-06, + "loss": 0.5591, "step": 6821 }, { - "epoch": 0.72, - "grad_norm": 2.10936758831189, - "learning_rate": 1.9463434727557927e-06, - "loss": 0.6207, + "epoch": 0.48, + "grad_norm": 0.7248487044352029, + "learning_rate": 5.4996269859577535e-06, + "loss": 0.436, "step": 6822 }, { - "epoch": 0.72, - "grad_norm": 2.4647814076539816, - "learning_rate": 1.9449941697724233e-06, - "loss": 0.5815, + "epoch": 0.48, + "grad_norm": 1.727058386164912, + "learning_rate": 5.498483557463761e-06, + "loss": 0.5108, "step": 6823 }, { - "epoch": 0.72, - "grad_norm": 2.2186953732210632, - "learning_rate": 1.943645221710797e-06, - "loss": 0.5623, + "epoch": 0.48, + "grad_norm": 2.164327040374841, + "learning_rate": 5.497340102638176e-06, + "loss": 0.5012, "step": 6824 }, { - "epoch": 0.72, - "grad_norm": 2.3686123006295414, - "learning_rate": 1.942296628727634e-06, - "loss": 0.6202, + "epoch": 0.48, + "grad_norm": 1.8176847394409723, + "learning_rate": 5.496196621541401e-06, + "loss": 0.5828, "step": 6825 }, { - "epoch": 0.72, - "grad_norm": 2.4056025600068973, - "learning_rate": 1.9409483909796096e-06, - "loss": 0.5397, + "epoch": 0.48, + "grad_norm": 1.7208381364463068, + "learning_rate": 5.4950531142338405e-06, + "loss": 0.545, "step": 6826 }, { - "epoch": 0.72, - "grad_norm": 2.843753987586021, - "learning_rate": 1.93960050862336e-06, - "loss": 0.6794, + "epoch": 0.48, + "grad_norm": 2.0650721285234614, + "learning_rate": 5.493909580775897e-06, + "loss": 0.5334, "step": 6827 }, { - "epoch": 0.72, - "grad_norm": 3.0253380845711915, - "learning_rate": 1.9382529818154765e-06, - "loss": 0.6079, + "epoch": 0.48, + "grad_norm": 0.7366063123848869, + "learning_rate": 5.492766021227975e-06, + "loss": 0.4569, "step": 6828 }, { - "epoch": 0.72, - "grad_norm": 2.226590014372156, - "learning_rate": 1.9369058107125094e-06, - "loss": 0.5139, + "epoch": 0.48, + "grad_norm": 1.6014310421100373, + "learning_rate": 5.4916224356504834e-06, + "loss": 0.5205, "step": 6829 }, { - "epoch": 0.72, - "grad_norm": 2.6079029240171265, - "learning_rate": 1.935558995470971e-06, - "loss": 0.5842, + "epoch": 0.48, + "grad_norm": 1.6981644786476937, + "learning_rate": 5.490478824103827e-06, + "loss": 0.5679, "step": 6830 }, { - "epoch": 0.72, - "grad_norm": 2.5961478591051663, - "learning_rate": 1.9342125362473313e-06, - "loss": 0.7159, + "epoch": 0.48, + "grad_norm": 1.528255920609519, + "learning_rate": 5.489335186648419e-06, + "loss": 0.511, "step": 6831 }, { - "epoch": 0.72, - "grad_norm": 4.508735121085522, - "learning_rate": 1.9328664331980175e-06, - "loss": 0.6256, + "epoch": 0.48, + "grad_norm": 0.8342447215443964, + "learning_rate": 5.488191523344667e-06, + "loss": 0.4472, "step": 6832 }, { - "epoch": 0.72, - "grad_norm": 3.0787485825721332, - "learning_rate": 1.931520686479413e-06, - "loss": 0.7014, + "epoch": 0.48, + "grad_norm": 1.6505220972720764, + "learning_rate": 5.487047834252984e-06, + "loss": 0.5732, "step": 6833 }, { - "epoch": 0.72, - "grad_norm": 2.2749815114493286, - "learning_rate": 1.9301752962478646e-06, - "loss": 0.6009, + "epoch": 0.48, + "grad_norm": 1.659439068002309, + "learning_rate": 5.4859041194337856e-06, + "loss": 0.4995, "step": 6834 }, { - "epoch": 0.72, - "grad_norm": 1.9794025506603408, - "learning_rate": 1.9288302626596772e-06, - "loss": 0.6428, + "epoch": 0.49, + "grad_norm": 1.6339104751416502, + "learning_rate": 5.484760378947485e-06, + "loss": 0.5697, "step": 6835 }, { - "epoch": 0.72, - "grad_norm": 5.039751673052599, - "learning_rate": 1.9274855858711157e-06, - "loss": 0.6417, + "epoch": 0.49, + "grad_norm": 2.5667646386805973, + "learning_rate": 5.483616612854499e-06, + "loss": 0.6024, "step": 6836 }, { - "epoch": 0.72, - "grad_norm": 2.8877557291619853, - "learning_rate": 1.9261412660383927e-06, - "loss": 0.6086, + "epoch": 0.49, + "grad_norm": 1.7215080676345558, + "learning_rate": 5.482472821215244e-06, + "loss": 0.5603, "step": 6837 }, { - "epoch": 0.72, - "grad_norm": 2.692848677613801, - "learning_rate": 1.924797303317692e-06, - "loss": 0.6517, + "epoch": 0.49, + "grad_norm": 5.462486626274449, + "learning_rate": 5.4813290040901405e-06, + "loss": 0.6044, "step": 6838 }, { - "epoch": 0.72, - "grad_norm": 2.4817682883772334, - "learning_rate": 1.9234536978651514e-06, - "loss": 0.6697, + "epoch": 0.49, + "grad_norm": 1.6376550408436865, + "learning_rate": 5.480185161539606e-06, + "loss": 0.5236, "step": 6839 }, { - "epoch": 0.72, - "grad_norm": 3.126381340774734, - "learning_rate": 1.922110449836869e-06, - "loss": 0.6342, + "epoch": 0.49, + "grad_norm": 1.821669736021595, + "learning_rate": 5.479041293624065e-06, + "loss": 0.5524, "step": 6840 }, { - "epoch": 0.72, - "grad_norm": 2.565024160486094, - "learning_rate": 1.920767559388896e-06, - "loss": 0.629, + "epoch": 0.49, + "grad_norm": 1.5989907233963154, + "learning_rate": 5.477897400403941e-06, + "loss": 0.582, "step": 6841 }, { - "epoch": 0.72, - "grad_norm": 2.208434714049697, - "learning_rate": 1.919425026677246e-06, - "loss": 0.6768, + "epoch": 0.49, + "grad_norm": 1.6768940150292562, + "learning_rate": 5.476753481939656e-06, + "loss": 0.5528, "step": 6842 }, { - "epoch": 0.72, - "grad_norm": 2.7771674907930843, - "learning_rate": 1.9180828518578907e-06, - "loss": 0.6414, + "epoch": 0.49, + "grad_norm": 1.6500073827380295, + "learning_rate": 5.475609538291637e-06, + "loss": 0.5017, "step": 6843 }, { - "epoch": 0.72, - "grad_norm": 2.972741578017247, - "learning_rate": 1.9167410350867634e-06, - "loss": 0.5638, + "epoch": 0.49, + "grad_norm": 1.715070039505323, + "learning_rate": 5.474465569520311e-06, + "loss": 0.5993, "step": 6844 }, { - "epoch": 0.72, - "grad_norm": 2.408080059628293, - "learning_rate": 1.9153995765197492e-06, - "loss": 0.5538, + "epoch": 0.49, + "grad_norm": 1.8973153109474683, + "learning_rate": 5.473321575686105e-06, + "loss": 0.5758, "step": 6845 }, { - "epoch": 0.72, - "grad_norm": 2.463378183778841, - "learning_rate": 1.9140584763126942e-06, - "loss": 0.6613, + "epoch": 0.49, + "grad_norm": 2.042830409711636, + "learning_rate": 5.47217755684945e-06, + "loss": 0.5395, "step": 6846 }, { - "epoch": 0.72, - "grad_norm": 3.6438629469274177, - "learning_rate": 1.912717734621404e-06, - "loss": 0.6311, + "epoch": 0.49, + "grad_norm": 1.6009687100485974, + "learning_rate": 5.471033513070776e-06, + "loss": 0.5476, "step": 6847 }, { - "epoch": 0.72, - "grad_norm": 2.4515690885324446, - "learning_rate": 1.911377351601644e-06, - "loss": 0.587, + "epoch": 0.49, + "grad_norm": 1.830977712380756, + "learning_rate": 5.4698894444105155e-06, + "loss": 0.4873, "step": 6848 }, { - "epoch": 0.72, - "grad_norm": 2.9571991246007077, - "learning_rate": 1.910037327409136e-06, - "loss": 0.5853, + "epoch": 0.49, + "grad_norm": 1.8412166568024568, + "learning_rate": 5.468745350929103e-06, + "loss": 0.634, "step": 6849 }, { - "epoch": 0.72, - "grad_norm": 1.0579456833316514, - "learning_rate": 1.9086976621995595e-06, - "loss": 0.5346, + "epoch": 0.49, + "grad_norm": 1.7589456382081645, + "learning_rate": 5.467601232686972e-06, + "loss": 0.5713, "step": 6850 }, { - "epoch": 0.72, - "grad_norm": 2.360162048127551, - "learning_rate": 1.9073583561285507e-06, - "loss": 0.5493, + "epoch": 0.49, + "grad_norm": 1.447611218930729, + "learning_rate": 5.466457089744561e-06, + "loss": 0.5307, "step": 6851 }, { - "epoch": 0.72, - "grad_norm": 2.9782767148226186, - "learning_rate": 1.9060194093517082e-06, - "loss": 0.6485, + "epoch": 0.49, + "grad_norm": 1.7536273806914817, + "learning_rate": 5.465312922162304e-06, + "loss": 0.524, "step": 6852 }, { - "epoch": 0.72, - "grad_norm": 3.172949041709697, - "learning_rate": 1.9046808220245888e-06, - "loss": 0.6456, + "epoch": 0.49, + "grad_norm": 1.9519312569900091, + "learning_rate": 5.4641687300006454e-06, + "loss": 0.5234, "step": 6853 }, { - "epoch": 0.72, - "grad_norm": 2.4126025762580485, - "learning_rate": 1.903342594302704e-06, - "loss": 0.6233, + "epoch": 0.49, + "grad_norm": 1.7592312729176955, + "learning_rate": 5.463024513320018e-06, + "loss": 0.6085, "step": 6854 }, { - "epoch": 0.72, - "grad_norm": 3.111218794053248, - "learning_rate": 1.9020047263415226e-06, - "loss": 0.564, + "epoch": 0.49, + "grad_norm": 1.5173810980812892, + "learning_rate": 5.4618802721808676e-06, + "loss": 0.5343, "step": 6855 }, { - "epoch": 0.72, - "grad_norm": 2.112267233952676, - "learning_rate": 1.9006672182964776e-06, - "loss": 0.5294, + "epoch": 0.49, + "grad_norm": 4.6571551068423975, + "learning_rate": 5.460736006643636e-06, + "loss": 0.5115, "step": 6856 }, { - "epoch": 0.72, - "grad_norm": 2.1191102063033305, - "learning_rate": 1.899330070322955e-06, - "loss": 0.5833, + "epoch": 0.49, + "grad_norm": 2.457131983281772, + "learning_rate": 5.459591716768768e-06, + "loss": 0.5166, "step": 6857 }, { - "epoch": 0.72, - "grad_norm": 2.8084622563861643, - "learning_rate": 1.8979932825763058e-06, - "loss": 0.6078, + "epoch": 0.49, + "grad_norm": 1.6561383961904648, + "learning_rate": 5.4584474026167085e-06, + "loss": 0.6072, "step": 6858 }, { - "epoch": 0.72, - "grad_norm": 2.731001054966648, - "learning_rate": 1.8966568552118265e-06, - "loss": 0.6116, + "epoch": 0.49, + "grad_norm": 0.7650884481059345, + "learning_rate": 5.457303064247904e-06, + "loss": 0.4494, "step": 6859 }, { - "epoch": 0.72, - "grad_norm": 2.4118252942652885, - "learning_rate": 1.895320788384783e-06, - "loss": 0.6477, + "epoch": 0.49, + "grad_norm": 1.947627716336281, + "learning_rate": 5.4561587017228016e-06, + "loss": 0.5563, "step": 6860 }, { - "epoch": 0.72, - "grad_norm": 2.4880736901897076, - "learning_rate": 1.8939850822503953e-06, - "loss": 0.6904, + "epoch": 0.49, + "grad_norm": 1.3721809392583832, + "learning_rate": 5.45501431510185e-06, + "loss": 0.5063, "step": 6861 }, { - "epoch": 0.72, - "grad_norm": 2.2508815431544282, - "learning_rate": 1.8926497369638435e-06, - "loss": 0.5529, + "epoch": 0.49, + "grad_norm": 1.7471197481097551, + "learning_rate": 5.4538699044455e-06, + "loss": 0.5318, "step": 6862 }, { - "epoch": 0.72, - "grad_norm": 2.475826447334696, - "learning_rate": 1.8913147526802633e-06, - "loss": 0.5821, + "epoch": 0.49, + "grad_norm": 1.6566591645074071, + "learning_rate": 5.4527254698142065e-06, + "loss": 0.5491, "step": 6863 }, { - "epoch": 0.72, - "grad_norm": 2.106833308302653, - "learning_rate": 1.8899801295547476e-06, - "loss": 0.6153, + "epoch": 0.49, + "grad_norm": 3.0620813763872676, + "learning_rate": 5.451581011268417e-06, + "loss": 0.5232, "step": 6864 }, { - "epoch": 0.72, - "grad_norm": 2.202917393863976, - "learning_rate": 1.8886458677423497e-06, - "loss": 0.5526, + "epoch": 0.49, + "grad_norm": 1.8305095614138014, + "learning_rate": 5.450436528868589e-06, + "loss": 0.5455, "step": 6865 }, { - "epoch": 0.72, - "grad_norm": 2.4715070549141216, - "learning_rate": 1.8873119673980828e-06, - "loss": 0.6657, + "epoch": 0.49, + "grad_norm": 2.5076124820560914, + "learning_rate": 5.449292022675179e-06, + "loss": 0.5508, "step": 6866 }, { - "epoch": 0.72, - "grad_norm": 2.967796086555414, - "learning_rate": 1.8859784286769133e-06, - "loss": 0.5738, + "epoch": 0.49, + "grad_norm": 2.162939706461156, + "learning_rate": 5.44814749274864e-06, + "loss": 0.5263, "step": 6867 }, { - "epoch": 0.72, - "grad_norm": 2.265639351961703, - "learning_rate": 1.8846452517337665e-06, - "loss": 0.676, + "epoch": 0.49, + "grad_norm": 1.6493360779913957, + "learning_rate": 5.447002939149433e-06, + "loss": 0.5416, "step": 6868 }, { - "epoch": 0.72, - "grad_norm": 2.2231883474524, - "learning_rate": 1.8833124367235294e-06, - "loss": 0.6419, + "epoch": 0.49, + "grad_norm": 1.6487864870071969, + "learning_rate": 5.445858361938014e-06, + "loss": 0.5987, "step": 6869 }, { - "epoch": 0.72, - "grad_norm": 3.0688124326659016, - "learning_rate": 1.8819799838010434e-06, - "loss": 0.617, + "epoch": 0.49, + "grad_norm": 1.6697345747372552, + "learning_rate": 5.444713761174848e-06, + "loss": 0.5225, "step": 6870 }, { - "epoch": 0.72, - "grad_norm": 2.5790235530152508, - "learning_rate": 1.8806478931211137e-06, - "loss": 0.6266, + "epoch": 0.49, + "grad_norm": 1.592110884496427, + "learning_rate": 5.443569136920393e-06, + "loss": 0.583, "step": 6871 }, { - "epoch": 0.72, - "grad_norm": 2.248003195138114, - "learning_rate": 1.8793161648384905e-06, - "loss": 0.6454, + "epoch": 0.49, + "grad_norm": 1.767971354260779, + "learning_rate": 5.442424489235114e-06, + "loss": 0.5202, "step": 6872 }, { - "epoch": 0.72, - "grad_norm": 2.4974510096976377, - "learning_rate": 1.8779847991078943e-06, - "loss": 0.6192, + "epoch": 0.49, + "grad_norm": 1.7578434505895417, + "learning_rate": 5.441279818179474e-06, + "loss": 0.5255, "step": 6873 }, { - "epoch": 0.72, - "grad_norm": 2.7581329611825183, - "learning_rate": 1.8766537960839997e-06, - "loss": 0.6176, + "epoch": 0.49, + "grad_norm": 1.9573114752347067, + "learning_rate": 5.440135123813939e-06, + "loss": 0.5475, "step": 6874 }, { - "epoch": 0.72, - "grad_norm": 3.5694350624696978, - "learning_rate": 1.8753231559214402e-06, - "loss": 0.4594, + "epoch": 0.49, + "grad_norm": 1.8926520693847737, + "learning_rate": 5.438990406198975e-06, + "loss": 0.5728, "step": 6875 }, { - "epoch": 0.72, - "grad_norm": 4.608950453790209, - "learning_rate": 1.8739928787748035e-06, - "loss": 0.5928, + "epoch": 0.49, + "grad_norm": 1.5583370220109374, + "learning_rate": 5.437845665395049e-06, + "loss": 0.5592, "step": 6876 }, { - "epoch": 0.72, - "grad_norm": 2.277145432538584, - "learning_rate": 1.872662964798636e-06, - "loss": 0.6286, + "epoch": 0.49, + "grad_norm": 2.0122615193463664, + "learning_rate": 5.436700901462633e-06, + "loss": 0.5273, "step": 6877 }, { - "epoch": 0.72, - "grad_norm": 2.794994081731649, - "learning_rate": 1.8713334141474454e-06, - "loss": 0.6851, + "epoch": 0.49, + "grad_norm": 2.758672493056485, + "learning_rate": 5.435556114462196e-06, + "loss": 0.5399, "step": 6878 }, { - "epoch": 0.72, - "grad_norm": 2.249886037254742, - "learning_rate": 1.8700042269756964e-06, - "loss": 0.633, + "epoch": 0.49, + "grad_norm": 1.6195408737847485, + "learning_rate": 5.434411304454208e-06, + "loss": 0.5331, "step": 6879 }, { - "epoch": 0.72, - "grad_norm": 2.2734533778197794, - "learning_rate": 1.8686754034378085e-06, - "loss": 0.6386, + "epoch": 0.49, + "grad_norm": 2.0871695865625672, + "learning_rate": 5.433266471499143e-06, + "loss": 0.536, "step": 6880 }, { - "epoch": 0.72, - "grad_norm": 2.128369477606803, - "learning_rate": 1.867346943688158e-06, - "loss": 0.6784, + "epoch": 0.49, + "grad_norm": 1.5431312928196177, + "learning_rate": 5.432121615657475e-06, + "loss": 0.5628, "step": 6881 }, { - "epoch": 0.72, - "grad_norm": 2.304439961783062, - "learning_rate": 1.8660188478810848e-06, - "loss": 0.5845, + "epoch": 0.49, + "grad_norm": 1.5801245171687117, + "learning_rate": 5.43097673698968e-06, + "loss": 0.5787, "step": 6882 }, { - "epoch": 0.72, - "grad_norm": 2.995582630490625, - "learning_rate": 1.8646911161708824e-06, - "loss": 0.6216, + "epoch": 0.49, + "grad_norm": 1.5992236585895854, + "learning_rate": 5.429831835556232e-06, + "loss": 0.516, "step": 6883 }, { - "epoch": 0.72, - "grad_norm": 2.3675530017194064, - "learning_rate": 1.8633637487118046e-06, - "loss": 0.5987, + "epoch": 0.49, + "grad_norm": 2.648962780394367, + "learning_rate": 5.42868691141761e-06, + "loss": 0.4792, "step": 6884 }, { - "epoch": 0.72, - "grad_norm": 2.9813523750802173, - "learning_rate": 1.862036745658059e-06, - "loss": 0.6377, + "epoch": 0.49, + "grad_norm": 1.9197147505800205, + "learning_rate": 5.427541964634294e-06, + "loss": 0.6095, "step": 6885 }, { - "epoch": 0.72, - "grad_norm": 2.6903692135653428, - "learning_rate": 1.8607101071638117e-06, - "loss": 0.6237, + "epoch": 0.49, + "grad_norm": 0.8053990026672082, + "learning_rate": 5.426396995266762e-06, + "loss": 0.4491, "step": 6886 }, { - "epoch": 0.72, - "grad_norm": 2.329812249626914, - "learning_rate": 1.8593838333831893e-06, - "loss": 0.6296, + "epoch": 0.49, + "grad_norm": 1.7859580506920931, + "learning_rate": 5.4252520033754965e-06, + "loss": 0.5691, "step": 6887 }, { - "epoch": 0.72, - "grad_norm": 3.501685681461769, - "learning_rate": 1.8580579244702762e-06, - "loss": 0.6258, + "epoch": 0.49, + "grad_norm": 1.611286187787717, + "learning_rate": 5.424106989020979e-06, + "loss": 0.5508, "step": 6888 }, { - "epoch": 0.72, - "grad_norm": 1.055180231542555, - "learning_rate": 1.8567323805791116e-06, - "loss": 0.539, + "epoch": 0.49, + "grad_norm": 1.7646495895167915, + "learning_rate": 5.422961952263692e-06, + "loss": 0.5216, "step": 6889 }, { - "epoch": 0.73, - "grad_norm": 2.6544723103678916, - "learning_rate": 1.8554072018636903e-06, - "loss": 0.5934, + "epoch": 0.49, + "grad_norm": 1.7498355138132562, + "learning_rate": 5.4218168931641225e-06, + "loss": 0.603, "step": 6890 }, { - "epoch": 0.73, - "grad_norm": 2.4845123889284735, - "learning_rate": 1.8540823884779708e-06, - "loss": 0.5886, + "epoch": 0.49, + "grad_norm": 2.7666032776133855, + "learning_rate": 5.420671811782755e-06, + "loss": 0.5474, "step": 6891 }, { - "epoch": 0.73, - "grad_norm": 3.1419531063459485, - "learning_rate": 1.8527579405758672e-06, - "loss": 0.5611, + "epoch": 0.49, + "grad_norm": 1.600096874996729, + "learning_rate": 5.419526708180077e-06, + "loss": 0.4791, "step": 6892 }, { - "epoch": 0.73, - "grad_norm": 2.4840964920780957, - "learning_rate": 1.851433858311248e-06, - "loss": 0.5448, + "epoch": 0.49, + "grad_norm": 1.7157826762501347, + "learning_rate": 5.4183815824165776e-06, + "loss": 0.4517, "step": 6893 }, { - "epoch": 0.73, - "grad_norm": 2.7386535448522418, - "learning_rate": 1.8501101418379398e-06, - "loss": 0.6005, + "epoch": 0.49, + "grad_norm": 2.0094733372185503, + "learning_rate": 5.417236434552745e-06, + "loss": 0.5307, "step": 6894 }, { - "epoch": 0.73, - "grad_norm": 2.879316470041651, - "learning_rate": 1.8487867913097301e-06, - "loss": 0.5817, + "epoch": 0.49, + "grad_norm": 1.5304113876381422, + "learning_rate": 5.4160912646490705e-06, + "loss": 0.5932, "step": 6895 }, { - "epoch": 0.73, - "grad_norm": 1.0545439469322904, - "learning_rate": 1.8474638068803612e-06, - "loss": 0.5266, + "epoch": 0.49, + "grad_norm": 0.6840037188705317, + "learning_rate": 5.414946072766044e-06, + "loss": 0.4664, "step": 6896 }, { - "epoch": 0.73, - "grad_norm": 3.215830096903211, - "learning_rate": 1.8461411887035368e-06, - "loss": 0.5681, + "epoch": 0.49, + "grad_norm": 1.5814451875383069, + "learning_rate": 5.413800858964161e-06, + "loss": 0.5096, "step": 6897 }, { - "epoch": 0.73, - "grad_norm": 2.3679216916484243, - "learning_rate": 1.8448189369329117e-06, - "loss": 0.6838, + "epoch": 0.49, + "grad_norm": 1.4589791692976493, + "learning_rate": 5.412655623303914e-06, + "loss": 0.5069, "step": 6898 }, { - "epoch": 0.73, - "grad_norm": 2.423382212663329, - "learning_rate": 1.8434970517221e-06, - "loss": 0.6508, + "epoch": 0.49, + "grad_norm": 2.083948739648997, + "learning_rate": 5.411510365845798e-06, + "loss": 0.4932, "step": 6899 }, { - "epoch": 0.73, - "grad_norm": 1.0003477907669218, - "learning_rate": 1.8421755332246765e-06, - "loss": 0.5632, + "epoch": 0.49, + "grad_norm": 1.9653425779741514, + "learning_rate": 5.410365086650312e-06, + "loss": 0.4712, "step": 6900 }, { - "epoch": 0.73, - "grad_norm": 2.3039328325659962, - "learning_rate": 1.840854381594173e-06, - "loss": 0.5946, + "epoch": 0.49, + "grad_norm": 1.575567873552415, + "learning_rate": 5.4092197857779505e-06, + "loss": 0.5109, "step": 6901 }, { - "epoch": 0.73, - "grad_norm": 2.9075536999442937, - "learning_rate": 1.8395335969840749e-06, - "loss": 0.6977, + "epoch": 0.49, + "grad_norm": 1.6798076731078717, + "learning_rate": 5.4080744632892145e-06, + "loss": 0.4904, "step": 6902 }, { - "epoch": 0.73, - "grad_norm": 2.653021560915067, - "learning_rate": 1.8382131795478265e-06, - "loss": 0.6104, + "epoch": 0.49, + "grad_norm": 1.448890322719478, + "learning_rate": 5.4069291192446e-06, + "loss": 0.5176, "step": 6903 }, { - "epoch": 0.73, - "grad_norm": 2.459736159668085, - "learning_rate": 1.8368931294388303e-06, - "loss": 0.6558, + "epoch": 0.49, + "grad_norm": 3.0668341957247187, + "learning_rate": 5.4057837537046135e-06, + "loss": 0.5119, "step": 6904 }, { - "epoch": 0.73, - "grad_norm": 2.145122811581977, - "learning_rate": 1.8355734468104476e-06, - "loss": 0.6376, + "epoch": 0.49, + "grad_norm": 1.7415097783732243, + "learning_rate": 5.404638366729753e-06, + "loss": 0.5778, "step": 6905 }, { - "epoch": 0.73, - "grad_norm": 2.3194721091248622, - "learning_rate": 1.8342541318159967e-06, - "loss": 0.6556, + "epoch": 0.49, + "grad_norm": 1.5247562201584641, + "learning_rate": 5.403492958380522e-06, + "loss": 0.5043, "step": 6906 }, { - "epoch": 0.73, - "grad_norm": 2.380316773328887, - "learning_rate": 1.8329351846087467e-06, - "loss": 0.4877, + "epoch": 0.49, + "grad_norm": 2.073529504951222, + "learning_rate": 5.4023475287174254e-06, + "loss": 0.5341, "step": 6907 }, { - "epoch": 0.73, - "grad_norm": 2.059952338938169, - "learning_rate": 1.8316166053419321e-06, - "loss": 0.648, + "epoch": 0.49, + "grad_norm": 0.8416959864542389, + "learning_rate": 5.401202077800971e-06, + "loss": 0.4315, "step": 6908 }, { - "epoch": 0.73, - "grad_norm": 2.3302793061982854, - "learning_rate": 1.8302983941687414e-06, - "loss": 0.665, + "epoch": 0.49, + "grad_norm": 1.9058821719808015, + "learning_rate": 5.400056605691663e-06, + "loss": 0.6106, "step": 6909 }, { - "epoch": 0.73, - "grad_norm": 2.196962381204859, - "learning_rate": 1.828980551242322e-06, - "loss": 0.6074, + "epoch": 0.49, + "grad_norm": 1.5611663609160082, + "learning_rate": 5.398911112450008e-06, + "loss": 0.5063, "step": 6910 }, { - "epoch": 0.73, - "grad_norm": 2.3653436980498133, - "learning_rate": 1.827663076715776e-06, - "loss": 0.5736, + "epoch": 0.49, + "grad_norm": 1.730549211380824, + "learning_rate": 5.397765598136517e-06, + "loss": 0.4681, "step": 6911 }, { - "epoch": 0.73, - "grad_norm": 2.3272350514303515, - "learning_rate": 1.8263459707421617e-06, - "loss": 0.5789, + "epoch": 0.49, + "grad_norm": 1.6984645208182654, + "learning_rate": 5.396620062811699e-06, + "loss": 0.4907, "step": 6912 }, { - "epoch": 0.73, - "grad_norm": 2.843158913797961, - "learning_rate": 1.8250292334744979e-06, - "loss": 0.6096, + "epoch": 0.49, + "grad_norm": 1.5431452525835356, + "learning_rate": 5.395474506536066e-06, + "loss": 0.539, "step": 6913 }, { - "epoch": 0.73, - "grad_norm": 2.3445312077529357, - "learning_rate": 1.8237128650657621e-06, - "loss": 0.6748, + "epoch": 0.49, + "grad_norm": 1.5398478459980431, + "learning_rate": 5.394328929370129e-06, + "loss": 0.5618, "step": 6914 }, { - "epoch": 0.73, - "grad_norm": 2.7392105169159904, - "learning_rate": 1.8223968656688834e-06, - "loss": 0.5316, + "epoch": 0.49, + "grad_norm": 1.8377864968904398, + "learning_rate": 5.393183331374403e-06, + "loss": 0.5359, "step": 6915 }, { - "epoch": 0.73, - "grad_norm": 2.381012515570709, - "learning_rate": 1.8210812354367501e-06, - "loss": 0.6245, + "epoch": 0.49, + "grad_norm": 2.238976006890208, + "learning_rate": 5.3920377126094e-06, + "loss": 0.4916, "step": 6916 }, { - "epoch": 0.73, - "grad_norm": 2.0098706319158968, - "learning_rate": 1.8197659745222095e-06, - "loss": 0.5341, + "epoch": 0.49, + "grad_norm": 1.5977911626279486, + "learning_rate": 5.390892073135637e-06, + "loss": 0.5355, "step": 6917 }, { - "epoch": 0.73, - "grad_norm": 2.3320907079732485, - "learning_rate": 1.818451083078065e-06, - "loss": 0.6104, + "epoch": 0.49, + "grad_norm": 1.938305735845428, + "learning_rate": 5.38974641301363e-06, + "loss": 0.5208, "step": 6918 }, { - "epoch": 0.73, - "grad_norm": 3.243267962515471, - "learning_rate": 1.817136561257078e-06, - "loss": 0.5681, + "epoch": 0.49, + "grad_norm": 1.6591006554501064, + "learning_rate": 5.388600732303898e-06, + "loss": 0.5971, "step": 6919 }, { - "epoch": 0.73, - "grad_norm": 3.244715320509986, - "learning_rate": 1.8158224092119648e-06, - "loss": 0.5684, + "epoch": 0.49, + "grad_norm": 1.6328976094888825, + "learning_rate": 5.387455031066957e-06, + "loss": 0.536, "step": 6920 }, { - "epoch": 0.73, - "grad_norm": 2.9394817301430924, - "learning_rate": 1.8145086270953977e-06, - "loss": 0.576, + "epoch": 0.49, + "grad_norm": 1.7192728937822588, + "learning_rate": 5.386309309363329e-06, + "loss": 0.5582, "step": 6921 }, { - "epoch": 0.73, - "grad_norm": 2.421731912227469, - "learning_rate": 1.8131952150600101e-06, - "loss": 0.573, + "epoch": 0.49, + "grad_norm": 1.8797868115752492, + "learning_rate": 5.385163567253533e-06, + "loss": 0.6046, "step": 6922 }, { - "epoch": 0.73, - "grad_norm": 2.7932028278457115, - "learning_rate": 1.8118821732583918e-06, - "loss": 0.5806, + "epoch": 0.49, + "grad_norm": 3.578515140377028, + "learning_rate": 5.384017804798094e-06, + "loss": 0.5202, "step": 6923 }, { - "epoch": 0.73, - "grad_norm": 2.9079436194870394, - "learning_rate": 1.8105695018430873e-06, - "loss": 0.6072, + "epoch": 0.49, + "grad_norm": 0.7601036073124097, + "learning_rate": 5.382872022057532e-06, + "loss": 0.4189, "step": 6924 }, { - "epoch": 0.73, - "grad_norm": 2.583809450277425, - "learning_rate": 1.8092572009665965e-06, - "loss": 0.628, + "epoch": 0.49, + "grad_norm": 2.115908610753962, + "learning_rate": 5.381726219092375e-06, + "loss": 0.4946, "step": 6925 }, { - "epoch": 0.73, - "grad_norm": 2.1674044267667516, - "learning_rate": 1.80794527078138e-06, - "loss": 0.5968, + "epoch": 0.49, + "grad_norm": 2.5270353344066674, + "learning_rate": 5.380580395963143e-06, + "loss": 0.5686, "step": 6926 }, { - "epoch": 0.73, - "grad_norm": 2.6185013262869354, - "learning_rate": 1.8066337114398568e-06, - "loss": 0.5478, + "epoch": 0.49, + "grad_norm": 1.7546215933493545, + "learning_rate": 5.379434552730365e-06, + "loss": 0.5448, "step": 6927 }, { - "epoch": 0.73, - "grad_norm": 3.158397579484625, - "learning_rate": 1.8053225230943982e-06, - "loss": 0.6001, + "epoch": 0.49, + "grad_norm": 1.8447637322224328, + "learning_rate": 5.378288689454569e-06, + "loss": 0.4894, "step": 6928 }, { - "epoch": 0.73, - "grad_norm": 3.264667477188976, - "learning_rate": 1.8040117058973317e-06, - "loss": 0.6074, + "epoch": 0.49, + "grad_norm": 1.9147275564700281, + "learning_rate": 5.377142806196282e-06, + "loss": 0.5486, "step": 6929 }, { - "epoch": 0.73, - "grad_norm": 2.7602307379116064, - "learning_rate": 1.802701260000947e-06, - "loss": 0.6428, + "epoch": 0.49, + "grad_norm": 1.62302999259758, + "learning_rate": 5.375996903016035e-06, + "loss": 0.5257, "step": 6930 }, { - "epoch": 0.73, - "grad_norm": 2.986327201725227, - "learning_rate": 1.8013911855574874e-06, - "loss": 0.5278, + "epoch": 0.49, + "grad_norm": 1.9055137815076353, + "learning_rate": 5.374850979974357e-06, + "loss": 0.5413, "step": 6931 }, { - "epoch": 0.73, - "grad_norm": 2.0385916995999906, - "learning_rate": 1.8000814827191548e-06, - "loss": 0.6268, + "epoch": 0.49, + "grad_norm": 0.7897735523760647, + "learning_rate": 5.373705037131781e-06, + "loss": 0.4247, "step": 6932 }, { - "epoch": 0.73, - "grad_norm": 2.1047064832640023, - "learning_rate": 1.7987721516381056e-06, - "loss": 0.6004, + "epoch": 0.49, + "grad_norm": 1.7704118802747413, + "learning_rate": 5.372559074548838e-06, + "loss": 0.5164, "step": 6933 }, { - "epoch": 0.73, - "grad_norm": 2.4994668971667364, - "learning_rate": 1.7974631924664533e-06, - "loss": 0.6405, + "epoch": 0.49, + "grad_norm": 1.5458396244025223, + "learning_rate": 5.371413092286062e-06, + "loss": 0.5391, "step": 6934 }, { - "epoch": 0.73, - "grad_norm": 2.388551949225777, - "learning_rate": 1.7961546053562684e-06, - "loss": 0.647, + "epoch": 0.49, + "grad_norm": 1.9643875914658357, + "learning_rate": 5.370267090403986e-06, + "loss": 0.5396, "step": 6935 }, { - "epoch": 0.73, - "grad_norm": 2.348701907039752, - "learning_rate": 1.7948463904595826e-06, - "loss": 0.5663, + "epoch": 0.49, + "grad_norm": 1.4523912930073641, + "learning_rate": 5.369121068963152e-06, + "loss": 0.5678, "step": 6936 }, { - "epoch": 0.73, - "grad_norm": 2.3266351864310137, - "learning_rate": 1.793538547928378e-06, - "loss": 0.604, + "epoch": 0.49, + "grad_norm": 0.7369652054910906, + "learning_rate": 5.367975028024089e-06, + "loss": 0.471, "step": 6937 }, { - "epoch": 0.73, - "grad_norm": 2.150907502807278, - "learning_rate": 1.7922310779145941e-06, - "loss": 0.5916, + "epoch": 0.49, + "grad_norm": 1.7829216213222345, + "learning_rate": 5.3668289676473404e-06, + "loss": 0.5707, "step": 6938 }, { - "epoch": 0.73, - "grad_norm": 3.2575410386870502, - "learning_rate": 1.7909239805701307e-06, - "loss": 0.6358, + "epoch": 0.49, + "grad_norm": 1.72150076223606, + "learning_rate": 5.365682887893441e-06, + "loss": 0.5586, "step": 6939 }, { - "epoch": 0.73, - "grad_norm": 1.0203290093238349, - "learning_rate": 1.7896172560468427e-06, - "loss": 0.5392, + "epoch": 0.49, + "grad_norm": 1.506106403016577, + "learning_rate": 5.364536788822934e-06, + "loss": 0.5807, "step": 6940 }, { - "epoch": 0.73, - "grad_norm": 2.455935523599585, - "learning_rate": 1.7883109044965452e-06, - "loss": 0.6663, + "epoch": 0.49, + "grad_norm": 1.5810194048234243, + "learning_rate": 5.363390670496357e-06, + "loss": 0.4996, "step": 6941 }, { - "epoch": 0.73, - "grad_norm": 2.292636831216748, - "learning_rate": 1.7870049260709992e-06, - "loss": 0.6031, + "epoch": 0.49, + "grad_norm": 1.6262793391049462, + "learning_rate": 5.362244532974253e-06, + "loss": 0.5879, "step": 6942 }, { - "epoch": 0.73, - "grad_norm": 2.740746619986824, - "learning_rate": 1.785699320921933e-06, - "loss": 0.5899, + "epoch": 0.49, + "grad_norm": 1.6624331046162002, + "learning_rate": 5.361098376317167e-06, + "loss": 0.5577, "step": 6943 }, { - "epoch": 0.73, - "grad_norm": 4.438088048263127, - "learning_rate": 1.784394089201028e-06, - "loss": 0.609, + "epoch": 0.49, + "grad_norm": 1.6700052862033892, + "learning_rate": 5.359952200585641e-06, + "loss": 0.5008, "step": 6944 }, { - "epoch": 0.73, - "grad_norm": 3.2919583772455385, - "learning_rate": 1.7830892310599245e-06, - "loss": 0.5254, + "epoch": 0.49, + "grad_norm": 1.5843493764891567, + "learning_rate": 5.358806005840219e-06, + "loss": 0.4716, "step": 6945 }, { - "epoch": 0.73, - "grad_norm": 3.0375360511034284, - "learning_rate": 1.7817847466502146e-06, - "loss": 0.5886, + "epoch": 0.49, + "grad_norm": 1.9403387458442525, + "learning_rate": 5.357659792141447e-06, + "loss": 0.473, "step": 6946 }, { - "epoch": 0.73, - "grad_norm": 2.278371165535222, - "learning_rate": 1.780480636123449e-06, - "loss": 0.6285, + "epoch": 0.49, + "grad_norm": 1.6359257822521884, + "learning_rate": 5.356513559549876e-06, + "loss": 0.5593, "step": 6947 }, { - "epoch": 0.73, - "grad_norm": 2.6474986848344844, - "learning_rate": 1.7791768996311355e-06, - "loss": 0.6188, + "epoch": 0.49, + "grad_norm": 0.7310183427999907, + "learning_rate": 5.3553673081260495e-06, + "loss": 0.4857, "step": 6948 }, { - "epoch": 0.73, - "grad_norm": 4.053270599492303, - "learning_rate": 1.7778735373247414e-06, - "loss": 0.6231, + "epoch": 0.49, + "grad_norm": 0.6825380568381457, + "learning_rate": 5.354221037930516e-06, + "loss": 0.4409, "step": 6949 }, { - "epoch": 0.73, - "grad_norm": 2.6023055630386622, - "learning_rate": 1.7765705493556857e-06, - "loss": 0.5683, + "epoch": 0.49, + "grad_norm": 1.8221171035252843, + "learning_rate": 5.353074749023829e-06, + "loss": 0.5326, "step": 6950 }, { - "epoch": 0.73, - "grad_norm": 2.180205593322918, - "learning_rate": 1.7752679358753433e-06, - "loss": 0.6944, + "epoch": 0.49, + "grad_norm": 0.8211126857606454, + "learning_rate": 5.351928441466537e-06, + "loss": 0.4511, "step": 6951 }, { - "epoch": 0.73, - "grad_norm": 3.3913259162961666, - "learning_rate": 1.7739656970350505e-06, - "loss": 0.6438, + "epoch": 0.49, + "grad_norm": 1.846743687017633, + "learning_rate": 5.350782115319192e-06, + "loss": 0.5125, "step": 6952 }, { - "epoch": 0.73, - "grad_norm": 2.504035889002391, - "learning_rate": 1.7726638329860978e-06, - "loss": 0.6856, + "epoch": 0.49, + "grad_norm": 1.4963822913203957, + "learning_rate": 5.349635770642348e-06, + "loss": 0.6567, "step": 6953 }, { - "epoch": 0.73, - "grad_norm": 2.764765266914022, - "learning_rate": 1.7713623438797335e-06, - "loss": 0.5888, + "epoch": 0.49, + "grad_norm": 1.658417654526787, + "learning_rate": 5.348489407496555e-06, + "loss": 0.5823, "step": 6954 }, { - "epoch": 0.73, - "grad_norm": 3.2754835937489934, - "learning_rate": 1.7700612298671587e-06, - "loss": 0.6877, + "epoch": 0.49, + "grad_norm": 1.8715326468117708, + "learning_rate": 5.347343025942374e-06, + "loss": 0.5757, "step": 6955 }, { - "epoch": 0.73, - "grad_norm": 2.449529265194134, - "learning_rate": 1.7687604910995321e-06, - "loss": 0.6649, + "epoch": 0.49, + "grad_norm": 1.8798641197126325, + "learning_rate": 5.346196626040355e-06, + "loss": 0.5657, "step": 6956 }, { - "epoch": 0.73, - "grad_norm": 2.4212481065052907, - "learning_rate": 1.7674601277279707e-06, - "loss": 0.618, + "epoch": 0.49, + "grad_norm": 1.807147006784223, + "learning_rate": 5.345050207851058e-06, + "loss": 0.5331, "step": 6957 }, { - "epoch": 0.73, - "grad_norm": 5.736474279576478, - "learning_rate": 1.7661601399035494e-06, - "loss": 0.5901, + "epoch": 0.49, + "grad_norm": 2.5836215387830497, + "learning_rate": 5.3439037714350395e-06, + "loss": 0.513, "step": 6958 }, { - "epoch": 0.73, - "grad_norm": 2.691986553144693, - "learning_rate": 1.7648605277772945e-06, - "loss": 0.6313, + "epoch": 0.49, + "grad_norm": 1.795980080542423, + "learning_rate": 5.34275731685286e-06, + "loss": 0.5658, "step": 6959 }, { - "epoch": 0.73, - "grad_norm": 2.6214863033679943, - "learning_rate": 1.7635612915001903e-06, - "loss": 0.7215, + "epoch": 0.49, + "grad_norm": 1.5645278913858518, + "learning_rate": 5.341610844165076e-06, + "loss": 0.5101, "step": 6960 }, { - "epoch": 0.73, - "grad_norm": 2.9600672604153533, - "learning_rate": 1.7622624312231795e-06, - "loss": 0.6593, + "epoch": 0.49, + "grad_norm": 1.7721525608869442, + "learning_rate": 5.34046435343225e-06, + "loss": 0.6163, "step": 6961 }, { - "epoch": 0.73, - "grad_norm": 2.8520335479613363, - "learning_rate": 1.7609639470971618e-06, - "loss": 0.5712, + "epoch": 0.49, + "grad_norm": 1.608341325962311, + "learning_rate": 5.339317844714944e-06, + "loss": 0.5233, "step": 6962 }, { - "epoch": 0.73, - "grad_norm": 2.2886996974373814, - "learning_rate": 1.7596658392729897e-06, - "loss": 0.5834, + "epoch": 0.49, + "grad_norm": 1.8005270857735067, + "learning_rate": 5.3381713180737185e-06, + "loss": 0.4769, "step": 6963 }, { - "epoch": 0.73, - "grad_norm": 2.8110720875652033, - "learning_rate": 1.7583681079014713e-06, - "loss": 0.6194, + "epoch": 0.49, + "grad_norm": 1.7246935860918604, + "learning_rate": 5.337024773569138e-06, + "loss": 0.5856, "step": 6964 }, { - "epoch": 0.73, - "grad_norm": 2.229998449327696, - "learning_rate": 1.7570707531333763e-06, - "loss": 0.5953, + "epoch": 0.49, + "grad_norm": 1.523920939325314, + "learning_rate": 5.335878211261767e-06, + "loss": 0.5633, "step": 6965 }, { - "epoch": 0.73, - "grad_norm": 2.6339048670728284, - "learning_rate": 1.7557737751194264e-06, - "loss": 0.6276, + "epoch": 0.49, + "grad_norm": 1.9275749132768, + "learning_rate": 5.3347316312121724e-06, + "loss": 0.5678, "step": 6966 }, { - "epoch": 0.73, - "grad_norm": 3.126572883969871, - "learning_rate": 1.7544771740103034e-06, - "loss": 0.6219, + "epoch": 0.49, + "grad_norm": 2.099006225437616, + "learning_rate": 5.333585033480917e-06, + "loss": 0.5606, "step": 6967 }, { - "epoch": 0.73, - "grad_norm": 3.532068840298241, - "learning_rate": 1.7531809499566399e-06, - "loss": 0.6469, + "epoch": 0.49, + "grad_norm": 1.5692240072987138, + "learning_rate": 5.332438418128571e-06, + "loss": 0.5046, "step": 6968 }, { - "epoch": 0.73, - "grad_norm": 3.1935164634899, - "learning_rate": 1.7518851031090267e-06, - "loss": 0.612, + "epoch": 0.49, + "grad_norm": 1.5059448418874546, + "learning_rate": 5.331291785215701e-06, + "loss": 0.5257, "step": 6969 }, { - "epoch": 0.73, - "grad_norm": 2.541841612435397, - "learning_rate": 1.7505896336180128e-06, - "loss": 0.5458, + "epoch": 0.49, + "grad_norm": 1.4844021252382882, + "learning_rate": 5.330145134802877e-06, + "loss": 0.5641, "step": 6970 }, { - "epoch": 0.73, - "grad_norm": 2.3737229858880187, - "learning_rate": 1.7492945416341034e-06, - "loss": 0.6253, + "epoch": 0.49, + "grad_norm": 1.377126571367187, + "learning_rate": 5.328998466950667e-06, + "loss": 0.4511, "step": 6971 }, { - "epoch": 0.73, - "grad_norm": 3.2573282697164827, - "learning_rate": 1.7479998273077581e-06, - "loss": 0.633, + "epoch": 0.49, + "grad_norm": 2.182705548652811, + "learning_rate": 5.327851781719643e-06, + "loss": 0.5305, "step": 6972 }, { - "epoch": 0.73, - "grad_norm": 2.7028931132859264, - "learning_rate": 1.74670549078939e-06, - "loss": 0.6755, + "epoch": 0.49, + "grad_norm": 2.016017308696118, + "learning_rate": 5.326705079170378e-06, + "loss": 0.5421, "step": 6973 }, { - "epoch": 0.73, - "grad_norm": 2.702801368704138, - "learning_rate": 1.7454115322293735e-06, - "loss": 0.5617, + "epoch": 0.49, + "grad_norm": 1.6127786779369107, + "learning_rate": 5.325558359363444e-06, + "loss": 0.6353, "step": 6974 }, { - "epoch": 0.73, - "grad_norm": 2.3197807420820613, - "learning_rate": 1.7441179517780376e-06, - "loss": 0.6572, + "epoch": 0.49, + "grad_norm": 1.7838234113694753, + "learning_rate": 5.324411622359413e-06, + "loss": 0.6014, "step": 6975 }, { - "epoch": 0.73, - "grad_norm": 2.674381378155758, - "learning_rate": 1.7428247495856699e-06, - "loss": 0.6158, + "epoch": 0.5, + "grad_norm": 1.781212263683923, + "learning_rate": 5.323264868218863e-06, + "loss": 0.5583, "step": 6976 }, { - "epoch": 0.73, - "grad_norm": 2.8755462439432047, - "learning_rate": 1.7415319258025032e-06, - "loss": 0.697, + "epoch": 0.5, + "grad_norm": 1.535634304680371, + "learning_rate": 5.322118097002364e-06, + "loss": 0.5883, "step": 6977 }, { - "epoch": 0.73, - "grad_norm": 3.0812031503014983, - "learning_rate": 1.7402394805787388e-06, - "loss": 0.6277, + "epoch": 0.5, + "grad_norm": 1.7589008068769565, + "learning_rate": 5.320971308770498e-06, + "loss": 0.5629, "step": 6978 }, { - "epoch": 0.73, - "grad_norm": 11.5464107755622, - "learning_rate": 1.7389474140645279e-06, - "loss": 0.5804, + "epoch": 0.5, + "grad_norm": 2.2869880359850314, + "learning_rate": 5.319824503583839e-06, + "loss": 0.5843, "step": 6979 }, { - "epoch": 0.73, - "grad_norm": 2.338776528858335, - "learning_rate": 1.7376557264099813e-06, - "loss": 0.5742, + "epoch": 0.5, + "grad_norm": 1.4618384765261996, + "learning_rate": 5.318677681502965e-06, + "loss": 0.4972, "step": 6980 }, { - "epoch": 0.73, - "grad_norm": 2.5226545710029087, - "learning_rate": 1.7363644177651623e-06, - "loss": 0.5308, + "epoch": 0.5, + "grad_norm": 0.8511289184310895, + "learning_rate": 5.3175308425884585e-06, + "loss": 0.4465, "step": 6981 }, { - "epoch": 0.73, - "grad_norm": 3.538706474138878, - "learning_rate": 1.7350734882800891e-06, - "loss": 0.6669, + "epoch": 0.5, + "grad_norm": 1.9978626439835512, + "learning_rate": 5.316383986900896e-06, + "loss": 0.5343, "step": 6982 }, { - "epoch": 0.73, - "grad_norm": 2.553740639868528, - "learning_rate": 1.7337829381047405e-06, - "loss": 0.6483, + "epoch": 0.5, + "grad_norm": 1.9544685068137313, + "learning_rate": 5.31523711450086e-06, + "loss": 0.5538, "step": 6983 }, { - "epoch": 0.73, - "grad_norm": 2.4491443310423726, - "learning_rate": 1.7324927673890495e-06, - "loss": 0.5876, + "epoch": 0.5, + "grad_norm": 1.4230179283454532, + "learning_rate": 5.314090225448929e-06, + "loss": 0.5127, "step": 6984 }, { - "epoch": 0.74, - "grad_norm": 2.3165156688725244, - "learning_rate": 1.7312029762829042e-06, - "loss": 0.5532, + "epoch": 0.5, + "grad_norm": 1.8984074403965865, + "learning_rate": 5.31294331980569e-06, + "loss": 0.5408, "step": 6985 }, { - "epoch": 0.74, - "grad_norm": 2.3703050030038884, - "learning_rate": 1.729913564936146e-06, - "loss": 0.646, + "epoch": 0.5, + "grad_norm": 1.4301540354980715, + "learning_rate": 5.311796397631724e-06, + "loss": 0.5447, "step": 6986 }, { - "epoch": 0.74, - "grad_norm": 2.457421486560212, - "learning_rate": 1.728624533498577e-06, - "loss": 0.6851, + "epoch": 0.5, + "grad_norm": 1.88641514203611, + "learning_rate": 5.310649458987616e-06, + "loss": 0.56, "step": 6987 }, { - "epoch": 0.74, - "grad_norm": 3.7152345384682492, - "learning_rate": 1.7273358821199527e-06, - "loss": 0.6291, + "epoch": 0.5, + "grad_norm": 1.7501132083980377, + "learning_rate": 5.3095025039339496e-06, + "loss": 0.5821, "step": 6988 }, { - "epoch": 0.74, - "grad_norm": 2.928945773179569, - "learning_rate": 1.7260476109499885e-06, - "loss": 0.6238, + "epoch": 0.5, + "grad_norm": 3.4952199376762434, + "learning_rate": 5.308355532531314e-06, + "loss": 0.5672, "step": 6989 }, { - "epoch": 0.74, - "grad_norm": 2.0786975252518975, - "learning_rate": 1.7247597201383459e-06, - "loss": 0.6212, + "epoch": 0.5, + "grad_norm": 2.151607462627416, + "learning_rate": 5.307208544840293e-06, + "loss": 0.5207, "step": 6990 }, { - "epoch": 0.74, - "grad_norm": 3.2322246793230462, - "learning_rate": 1.7234722098346512e-06, - "loss": 0.5755, + "epoch": 0.5, + "grad_norm": 2.60827662563997, + "learning_rate": 5.3060615409214776e-06, + "loss": 0.5742, "step": 6991 }, { - "epoch": 0.74, - "grad_norm": 2.385974734862681, - "learning_rate": 1.7221850801884838e-06, - "loss": 0.6729, + "epoch": 0.5, + "grad_norm": 1.7863217674113678, + "learning_rate": 5.304914520835452e-06, + "loss": 0.4835, "step": 6992 }, { - "epoch": 0.74, - "grad_norm": 2.2783917987848175, - "learning_rate": 1.7208983313493804e-06, - "loss": 0.5833, + "epoch": 0.5, + "grad_norm": 1.5563435078014416, + "learning_rate": 5.30376748464281e-06, + "loss": 0.4976, "step": 6993 }, { - "epoch": 0.74, - "grad_norm": 3.1121321239656368, - "learning_rate": 1.7196119634668296e-06, - "loss": 0.6074, + "epoch": 0.5, + "grad_norm": 1.5117521089523513, + "learning_rate": 5.302620432404138e-06, + "loss": 0.5841, "step": 6994 }, { - "epoch": 0.74, - "grad_norm": 2.4956465433518087, - "learning_rate": 1.7183259766902765e-06, - "loss": 0.5979, + "epoch": 0.5, + "grad_norm": 2.0067057900232492, + "learning_rate": 5.301473364180032e-06, + "loss": 0.6124, "step": 6995 }, { - "epoch": 0.74, - "grad_norm": 2.2925119083577363, - "learning_rate": 1.7170403711691252e-06, - "loss": 0.5927, + "epoch": 0.5, + "grad_norm": 1.8426391738329484, + "learning_rate": 5.3003262800310805e-06, + "loss": 0.4833, "step": 6996 }, { - "epoch": 0.74, - "grad_norm": 4.1901529302786145, - "learning_rate": 1.7157551470527356e-06, - "loss": 0.5835, + "epoch": 0.5, + "grad_norm": 1.715258371915016, + "learning_rate": 5.299179180017877e-06, + "loss": 0.5172, "step": 6997 }, { - "epoch": 0.74, - "grad_norm": 2.1103141114003794, - "learning_rate": 1.7144703044904186e-06, - "loss": 0.647, + "epoch": 0.5, + "grad_norm": 1.9591099775091267, + "learning_rate": 5.298032064201016e-06, + "loss": 0.4771, "step": 6998 }, { - "epoch": 0.74, - "grad_norm": 2.2812632686596874, - "learning_rate": 1.7131858436314431e-06, - "loss": 0.5589, + "epoch": 0.5, + "grad_norm": 1.836050398876122, + "learning_rate": 5.296884932641091e-06, + "loss": 0.6412, "step": 6999 }, { - "epoch": 0.74, - "grad_norm": 3.893898766295147, - "learning_rate": 1.7119017646250346e-06, - "loss": 0.5332, + "epoch": 0.5, + "grad_norm": 3.042380486167342, + "learning_rate": 5.295737785398698e-06, + "loss": 0.543, "step": 7000 }, { - "epoch": 0.74, - "grad_norm": 2.333401918284876, - "learning_rate": 1.7106180676203743e-06, - "loss": 0.644, + "epoch": 0.5, + "grad_norm": 2.2117069758666794, + "learning_rate": 5.294590622534431e-06, + "loss": 0.5831, "step": 7001 }, { - "epoch": 0.74, - "grad_norm": 2.2776944296611905, - "learning_rate": 1.7093347527666e-06, - "loss": 0.6315, + "epoch": 0.5, + "grad_norm": 1.8353090998488797, + "learning_rate": 5.2934434441088925e-06, + "loss": 0.5486, "step": 7002 }, { - "epoch": 0.74, - "grad_norm": 2.4878470361517535, - "learning_rate": 1.708051820212801e-06, - "loss": 0.5557, + "epoch": 0.5, + "grad_norm": 1.5554162267991847, + "learning_rate": 5.292296250182676e-06, + "loss": 0.5056, "step": 7003 }, { - "epoch": 0.74, - "grad_norm": 0.9538072931011493, - "learning_rate": 1.7067692701080247e-06, - "loss": 0.556, + "epoch": 0.5, + "grad_norm": 0.7936267314306734, + "learning_rate": 5.29114904081638e-06, + "loss": 0.4493, "step": 7004 }, { - "epoch": 0.74, - "grad_norm": 3.936334663090163, - "learning_rate": 1.7054871026012748e-06, - "loss": 0.6332, + "epoch": 0.5, + "grad_norm": 1.9437582346297118, + "learning_rate": 5.290001816070606e-06, + "loss": 0.6191, "step": 7005 }, { - "epoch": 0.74, - "grad_norm": 2.569749185859605, - "learning_rate": 1.7042053178415114e-06, - "loss": 0.597, + "epoch": 0.5, + "grad_norm": 1.7426267450638366, + "learning_rate": 5.288854576005954e-06, + "loss": 0.4912, "step": 7006 }, { - "epoch": 0.74, - "grad_norm": 2.2112215878359844, - "learning_rate": 1.7029239159776468e-06, - "loss": 0.6083, + "epoch": 0.5, + "grad_norm": 2.5441189136279694, + "learning_rate": 5.287707320683023e-06, + "loss": 0.4929, "step": 7007 }, { - "epoch": 0.74, - "grad_norm": 2.9065918760961362, - "learning_rate": 1.7016428971585491e-06, - "loss": 0.6281, + "epoch": 0.5, + "grad_norm": 1.6649730677336312, + "learning_rate": 5.286560050162417e-06, + "loss": 0.4943, "step": 7008 }, { - "epoch": 0.74, - "grad_norm": 2.0826011656455226, - "learning_rate": 1.700362261533045e-06, - "loss": 0.5959, + "epoch": 0.5, + "grad_norm": 1.5173375610006026, + "learning_rate": 5.285412764504738e-06, + "loss": 0.5004, "step": 7009 }, { - "epoch": 0.74, - "grad_norm": 2.3233212826323664, - "learning_rate": 1.699082009249915e-06, - "loss": 0.6065, + "epoch": 0.5, + "grad_norm": 1.6472109180947638, + "learning_rate": 5.284265463770589e-06, + "loss": 0.5581, "step": 7010 }, { - "epoch": 0.74, - "grad_norm": 0.9508372060278683, - "learning_rate": 1.6978021404578986e-06, - "loss": 0.5127, + "epoch": 0.5, + "grad_norm": 1.8848417152411876, + "learning_rate": 5.2831181480205774e-06, + "loss": 0.5279, "step": 7011 }, { - "epoch": 0.74, - "grad_norm": 2.3456337360777826, - "learning_rate": 1.6965226553056807e-06, - "loss": 0.6536, + "epoch": 0.5, + "grad_norm": 1.4680674468460801, + "learning_rate": 5.281970817315304e-06, + "loss": 0.5379, "step": 7012 }, { - "epoch": 0.74, - "grad_norm": 2.740110007869177, - "learning_rate": 1.6952435539419114e-06, - "loss": 0.6519, + "epoch": 0.5, + "grad_norm": 1.6745404478908734, + "learning_rate": 5.280823471715377e-06, + "loss": 0.5391, "step": 7013 }, { - "epoch": 0.74, - "grad_norm": 2.285157779061375, - "learning_rate": 1.6939648365151929e-06, - "loss": 0.6668, + "epoch": 0.5, + "grad_norm": 1.587726940534131, + "learning_rate": 5.279676111281403e-06, + "loss": 0.5376, "step": 7014 }, { - "epoch": 0.74, - "grad_norm": 2.286468423276322, - "learning_rate": 1.692686503174084e-06, - "loss": 0.5346, + "epoch": 0.5, + "grad_norm": 1.7918703189754146, + "learning_rate": 5.278528736073989e-06, + "loss": 0.5453, "step": 7015 }, { - "epoch": 0.74, - "grad_norm": 3.544160329777678, - "learning_rate": 1.6914085540670972e-06, - "loss": 0.6256, + "epoch": 0.5, + "grad_norm": 1.561443119290921, + "learning_rate": 5.277381346153743e-06, + "loss": 0.578, "step": 7016 }, { - "epoch": 0.74, - "grad_norm": 2.6105527595546683, - "learning_rate": 1.6901309893426987e-06, - "loss": 0.5112, + "epoch": 0.5, + "grad_norm": 1.600014820375347, + "learning_rate": 5.276233941581274e-06, + "loss": 0.5423, "step": 7017 }, { - "epoch": 0.74, - "grad_norm": 2.824511640017726, - "learning_rate": 1.688853809149314e-06, - "loss": 0.6006, + "epoch": 0.5, + "grad_norm": 2.3037477893429505, + "learning_rate": 5.275086522417193e-06, + "loss": 0.565, "step": 7018 }, { - "epoch": 0.74, - "grad_norm": 2.421375917201986, - "learning_rate": 1.6875770136353237e-06, - "loss": 0.5986, + "epoch": 0.5, + "grad_norm": 0.781481554719757, + "learning_rate": 5.27393908872211e-06, + "loss": 0.4315, "step": 7019 }, { - "epoch": 0.74, - "grad_norm": 2.775614276256507, - "learning_rate": 1.686300602949061e-06, - "loss": 0.641, + "epoch": 0.5, + "grad_norm": 1.7953016018705, + "learning_rate": 5.272791640556636e-06, + "loss": 0.5108, "step": 7020 }, { - "epoch": 0.74, - "grad_norm": 2.8464697028833026, - "learning_rate": 1.6850245772388136e-06, - "loss": 0.6148, + "epoch": 0.5, + "grad_norm": 0.7581132920306195, + "learning_rate": 5.2716441779813835e-06, + "loss": 0.4193, "step": 7021 }, { - "epoch": 0.74, - "grad_norm": 2.6951000358839816, - "learning_rate": 1.6837489366528275e-06, - "loss": 0.5996, + "epoch": 0.5, + "grad_norm": 1.665117172115333, + "learning_rate": 5.270496701056964e-06, + "loss": 0.5374, "step": 7022 }, { - "epoch": 0.74, - "grad_norm": 2.2776028181000036, - "learning_rate": 1.6824736813393044e-06, - "loss": 0.5767, + "epoch": 0.5, + "grad_norm": 1.6103185218458027, + "learning_rate": 5.269349209843993e-06, + "loss": 0.5712, "step": 7023 }, { - "epoch": 0.74, - "grad_norm": 3.049024430013574, - "learning_rate": 1.6811988114464024e-06, - "loss": 0.5973, + "epoch": 0.5, + "grad_norm": 2.181553038881357, + "learning_rate": 5.268201704403082e-06, + "loss": 0.594, "step": 7024 }, { - "epoch": 0.74, - "grad_norm": 2.259489650346381, - "learning_rate": 1.6799243271222248e-06, - "loss": 0.6279, + "epoch": 0.5, + "grad_norm": 1.899422161505861, + "learning_rate": 5.2670541847948495e-06, + "loss": 0.6153, "step": 7025 }, { - "epoch": 0.74, - "grad_norm": 2.7237784454005047, - "learning_rate": 1.678650228514842e-06, - "loss": 0.6341, + "epoch": 0.5, + "grad_norm": 1.6008747066951738, + "learning_rate": 5.265906651079912e-06, + "loss": 0.5625, "step": 7026 }, { - "epoch": 0.74, - "grad_norm": 2.492400316742001, - "learning_rate": 1.677376515772275e-06, - "loss": 0.5853, + "epoch": 0.5, + "grad_norm": 1.767450567383687, + "learning_rate": 5.264759103318882e-06, + "loss": 0.4915, "step": 7027 }, { - "epoch": 0.74, - "grad_norm": 7.370329142260077, - "learning_rate": 1.6761031890425007e-06, - "loss": 0.6075, + "epoch": 0.5, + "grad_norm": 2.2237631575939423, + "learning_rate": 5.26361154157238e-06, + "loss": 0.5472, "step": 7028 }, { - "epoch": 0.74, - "grad_norm": 2.1958100915780823, - "learning_rate": 1.6748302484734496e-06, - "loss": 0.5913, + "epoch": 0.5, + "grad_norm": 4.17307929353586, + "learning_rate": 5.2624639659010225e-06, + "loss": 0.5209, "step": 7029 }, { - "epoch": 0.74, - "grad_norm": 7.685029763578393, - "learning_rate": 1.6735576942130066e-06, - "loss": 0.5991, + "epoch": 0.5, + "grad_norm": 1.4534541420189495, + "learning_rate": 5.261316376365429e-06, + "loss": 0.5337, "step": 7030 }, { - "epoch": 0.74, - "grad_norm": 2.0884424565510606, - "learning_rate": 1.672285526409015e-06, - "loss": 0.5875, + "epoch": 0.5, + "grad_norm": 1.7932631131380319, + "learning_rate": 5.260168773026218e-06, + "loss": 0.5628, "step": 7031 }, { - "epoch": 0.74, - "grad_norm": 4.244543977179305, - "learning_rate": 1.6710137452092728e-06, - "loss": 0.647, + "epoch": 0.5, + "grad_norm": 2.5482969468349235, + "learning_rate": 5.259021155944013e-06, + "loss": 0.5937, "step": 7032 }, { - "epoch": 0.74, - "grad_norm": 2.8555347187741154, - "learning_rate": 1.6697423507615307e-06, - "loss": 0.6764, + "epoch": 0.5, + "grad_norm": 1.7890710027161159, + "learning_rate": 5.257873525179433e-06, + "loss": 0.5143, "step": 7033 }, { - "epoch": 0.74, - "grad_norm": 2.943847721173569, - "learning_rate": 1.6684713432134935e-06, - "loss": 0.637, + "epoch": 0.5, + "grad_norm": 1.7563291540552648, + "learning_rate": 5.256725880793098e-06, + "loss": 0.5432, "step": 7034 }, { - "epoch": 0.74, - "grad_norm": 2.9179340893616694, - "learning_rate": 1.6672007227128256e-06, - "loss": 0.6128, + "epoch": 0.5, + "grad_norm": 1.8428281776517292, + "learning_rate": 5.255578222845632e-06, + "loss": 0.6, "step": 7035 }, { - "epoch": 0.74, - "grad_norm": 2.3128758180540188, - "learning_rate": 1.6659304894071437e-06, - "loss": 0.5963, + "epoch": 0.5, + "grad_norm": 1.6614926125640286, + "learning_rate": 5.25443055139766e-06, + "loss": 0.5366, "step": 7036 }, { - "epoch": 0.74, - "grad_norm": 2.279530033918136, - "learning_rate": 1.6646606434440216e-06, - "loss": 0.5943, + "epoch": 0.5, + "grad_norm": 1.80539304474754, + "learning_rate": 5.2532828665098025e-06, + "loss": 0.5498, "step": 7037 }, { - "epoch": 0.74, - "grad_norm": 2.4218365961921924, - "learning_rate": 1.6633911849709838e-06, - "loss": 0.5705, + "epoch": 0.5, + "grad_norm": 1.7329691396716165, + "learning_rate": 5.252135168242686e-06, + "loss": 0.5369, "step": 7038 }, { - "epoch": 0.74, - "grad_norm": 2.4252321627282565, - "learning_rate": 1.6621221141355114e-06, - "loss": 0.6823, + "epoch": 0.5, + "grad_norm": 0.7121229173875054, + "learning_rate": 5.2509874566569355e-06, + "loss": 0.4482, "step": 7039 }, { - "epoch": 0.74, - "grad_norm": 2.815209527008716, - "learning_rate": 1.6608534310850432e-06, - "loss": 0.6023, + "epoch": 0.5, + "grad_norm": 0.7755123416221916, + "learning_rate": 5.249839731813177e-06, + "loss": 0.4245, "step": 7040 }, { - "epoch": 0.74, - "grad_norm": 3.2696793670858457, - "learning_rate": 1.6595851359669723e-06, - "loss": 0.6344, + "epoch": 0.5, + "grad_norm": 1.6875027963204883, + "learning_rate": 5.248691993772038e-06, + "loss": 0.5662, "step": 7041 }, { - "epoch": 0.74, - "grad_norm": 3.048168501203653, - "learning_rate": 1.6583172289286447e-06, - "loss": 0.6739, + "epoch": 0.5, + "grad_norm": 2.863930373213115, + "learning_rate": 5.247544242594145e-06, + "loss": 0.5467, "step": 7042 }, { - "epoch": 0.74, - "grad_norm": 2.345623717215056, - "learning_rate": 1.6570497101173595e-06, - "loss": 0.5041, + "epoch": 0.5, + "grad_norm": 1.691302569286533, + "learning_rate": 5.2463964783401265e-06, + "loss": 0.5497, "step": 7043 }, { - "epoch": 0.74, - "grad_norm": 2.656636229228406, - "learning_rate": 1.6557825796803755e-06, - "loss": 0.5783, + "epoch": 0.5, + "grad_norm": 1.6491347490759714, + "learning_rate": 5.245248701070611e-06, + "loss": 0.5737, "step": 7044 }, { - "epoch": 0.74, - "grad_norm": 2.047758363020559, - "learning_rate": 1.6545158377649063e-06, - "loss": 0.6101, + "epoch": 0.5, + "grad_norm": 1.7061477169836194, + "learning_rate": 5.244100910846227e-06, + "loss": 0.487, "step": 7045 }, { - "epoch": 0.74, - "grad_norm": 2.4301367175325446, - "learning_rate": 1.6532494845181157e-06, - "loss": 0.6579, + "epoch": 0.5, + "grad_norm": 1.7896817768077522, + "learning_rate": 5.242953107727606e-06, + "loss": 0.5474, "step": 7046 }, { - "epoch": 0.74, - "grad_norm": 2.4798435707976627, - "learning_rate": 1.6519835200871243e-06, - "loss": 0.6268, + "epoch": 0.5, + "grad_norm": 1.7925734906997224, + "learning_rate": 5.24180529177538e-06, + "loss": 0.5845, "step": 7047 }, { - "epoch": 0.74, - "grad_norm": 2.2104318045494717, - "learning_rate": 1.6507179446190091e-06, - "loss": 0.5432, + "epoch": 0.5, + "grad_norm": 1.7593264382668896, + "learning_rate": 5.240657463050179e-06, + "loss": 0.5176, "step": 7048 }, { - "epoch": 0.74, - "grad_norm": 4.720179561419772, - "learning_rate": 1.649452758260801e-06, - "loss": 0.5763, + "epoch": 0.5, + "grad_norm": 1.6207228830169353, + "learning_rate": 5.239509621612635e-06, + "loss": 0.557, "step": 7049 }, { - "epoch": 0.74, - "grad_norm": 2.8768611462675713, - "learning_rate": 1.648187961159488e-06, - "loss": 0.5999, + "epoch": 0.5, + "grad_norm": 1.4434230321433479, + "learning_rate": 5.238361767523381e-06, + "loss": 0.5074, "step": 7050 }, { - "epoch": 0.74, - "grad_norm": 2.9574492500850096, - "learning_rate": 1.6469235534620087e-06, - "loss": 0.6756, + "epoch": 0.5, + "grad_norm": 1.662484952456173, + "learning_rate": 5.2372139008430515e-06, + "loss": 0.5512, "step": 7051 }, { - "epoch": 0.74, - "grad_norm": 2.4478650282919654, - "learning_rate": 1.6456595353152566e-06, - "loss": 0.6178, + "epoch": 0.5, + "grad_norm": 1.7697828381047866, + "learning_rate": 5.236066021632279e-06, + "loss": 0.5605, "step": 7052 }, { - "epoch": 0.74, - "grad_norm": 3.4521440344822647, - "learning_rate": 1.644395906866083e-06, - "loss": 0.633, + "epoch": 0.5, + "grad_norm": 1.7238588829708457, + "learning_rate": 5.2349181299517e-06, + "loss": 0.5587, "step": 7053 }, { - "epoch": 0.74, - "grad_norm": 2.3845856771271645, - "learning_rate": 1.6431326682612947e-06, - "loss": 0.6037, + "epoch": 0.5, + "grad_norm": 1.7134398280710335, + "learning_rate": 5.233770225861947e-06, + "loss": 0.4846, "step": 7054 }, { - "epoch": 0.74, - "grad_norm": 1.9893993557633762, - "learning_rate": 1.6418698196476497e-06, - "loss": 0.5822, + "epoch": 0.5, + "grad_norm": 2.186337835633182, + "learning_rate": 5.232622309423661e-06, + "loss": 0.5014, "step": 7055 }, { - "epoch": 0.74, - "grad_norm": 2.502449539506666, - "learning_rate": 1.6406073611718593e-06, - "loss": 0.6611, + "epoch": 0.5, + "grad_norm": 1.6732215935618153, + "learning_rate": 5.231474380697476e-06, + "loss": 0.5656, "step": 7056 }, { - "epoch": 0.74, - "grad_norm": 2.886805930275283, - "learning_rate": 1.6393452929805947e-06, - "loss": 0.6163, + "epoch": 0.5, + "grad_norm": 1.8241033033759355, + "learning_rate": 5.23032643974403e-06, + "loss": 0.5256, "step": 7057 }, { - "epoch": 0.74, - "grad_norm": 2.324217731618782, - "learning_rate": 1.63808361522048e-06, - "loss": 0.6323, + "epoch": 0.5, + "grad_norm": 1.6781383724471364, + "learning_rate": 5.229178486623962e-06, + "loss": 0.4735, "step": 7058 }, { - "epoch": 0.74, - "grad_norm": 2.4841967196947907, - "learning_rate": 1.6368223280380951e-06, - "loss": 0.6249, + "epoch": 0.5, + "grad_norm": 1.6439613492732295, + "learning_rate": 5.228030521397908e-06, + "loss": 0.5308, "step": 7059 }, { - "epoch": 0.74, - "grad_norm": 2.491514087882754, - "learning_rate": 1.6355614315799673e-06, - "loss": 0.5953, + "epoch": 0.5, + "grad_norm": 1.570317505145271, + "learning_rate": 5.226882544126508e-06, + "loss": 0.4488, "step": 7060 }, { - "epoch": 0.74, - "grad_norm": 4.09150523009862, - "learning_rate": 1.6343009259925863e-06, - "loss": 0.663, + "epoch": 0.5, + "grad_norm": 1.6983211137879015, + "learning_rate": 5.2257345548704055e-06, + "loss": 0.479, "step": 7061 }, { - "epoch": 0.74, - "grad_norm": 2.642962343711459, - "learning_rate": 1.633040811422395e-06, - "loss": 0.6604, + "epoch": 0.5, + "grad_norm": 2.043963367654638, + "learning_rate": 5.224586553690238e-06, + "loss": 0.5632, "step": 7062 }, { - "epoch": 0.74, - "grad_norm": 2.222557968930457, - "learning_rate": 1.6317810880157908e-06, - "loss": 0.5329, + "epoch": 0.5, + "grad_norm": 1.7285121275782998, + "learning_rate": 5.223438540646648e-06, + "loss": 0.573, "step": 7063 }, { - "epoch": 0.74, - "grad_norm": 2.7336550978700083, - "learning_rate": 1.6305217559191232e-06, - "loss": 0.5325, + "epoch": 0.5, + "grad_norm": 2.2996037055143477, + "learning_rate": 5.222290515800279e-06, + "loss": 0.6057, "step": 7064 }, { - "epoch": 0.74, - "grad_norm": 2.1083210565732484, - "learning_rate": 1.629262815278696e-06, - "loss": 0.5392, + "epoch": 0.5, + "grad_norm": 1.5247382689469942, + "learning_rate": 5.22114247921177e-06, + "loss": 0.4863, "step": 7065 }, { - "epoch": 0.74, - "grad_norm": 2.720575901544298, - "learning_rate": 1.6280042662407714e-06, - "loss": 0.6113, + "epoch": 0.5, + "grad_norm": 2.1986877183367146, + "learning_rate": 5.219994430941766e-06, + "loss": 0.5418, "step": 7066 }, { - "epoch": 0.74, - "grad_norm": 2.16404863181636, - "learning_rate": 1.626746108951565e-06, - "loss": 0.5831, + "epoch": 0.5, + "grad_norm": 1.7643448686306238, + "learning_rate": 5.218846371050909e-06, + "loss": 0.5751, "step": 7067 }, { - "epoch": 0.74, - "grad_norm": 2.4115471190550117, - "learning_rate": 1.6254883435572449e-06, - "loss": 0.59, + "epoch": 0.5, + "grad_norm": 1.6281119541807476, + "learning_rate": 5.2176982995998475e-06, + "loss": 0.5506, "step": 7068 }, { - "epoch": 0.74, - "grad_norm": 3.0583452239282183, - "learning_rate": 1.6242309702039327e-06, - "loss": 0.6492, + "epoch": 0.5, + "grad_norm": 1.5186360542138146, + "learning_rate": 5.216550216649224e-06, + "loss": 0.57, "step": 7069 }, { - "epoch": 0.74, - "grad_norm": 3.2718246037744843, - "learning_rate": 1.6229739890377084e-06, - "loss": 0.5989, + "epoch": 0.5, + "grad_norm": 1.5945754296133121, + "learning_rate": 5.215402122259683e-06, + "loss": 0.6253, "step": 7070 }, { - "epoch": 0.74, - "grad_norm": 2.7419306661734812, - "learning_rate": 1.6217174002046032e-06, - "loss": 0.5111, + "epoch": 0.5, + "grad_norm": 1.8557963536463613, + "learning_rate": 5.214254016491872e-06, + "loss": 0.524, "step": 7071 }, { - "epoch": 0.74, - "grad_norm": 3.0816295246885725, - "learning_rate": 1.6204612038506068e-06, - "loss": 0.639, + "epoch": 0.5, + "grad_norm": 1.9320802794681051, + "learning_rate": 5.21310589940644e-06, + "loss": 0.5034, "step": 7072 }, { - "epoch": 0.74, - "grad_norm": 2.2970090731001003, - "learning_rate": 1.6192054001216585e-06, - "loss": 0.6099, + "epoch": 0.5, + "grad_norm": 1.560260248752983, + "learning_rate": 5.21195777106403e-06, + "loss": 0.4977, "step": 7073 }, { - "epoch": 0.74, - "grad_norm": 2.7790697280686962, - "learning_rate": 1.6179499891636524e-06, - "loss": 0.6486, + "epoch": 0.5, + "grad_norm": 1.7071366758187072, + "learning_rate": 5.2108096315252935e-06, + "loss": 0.5265, "step": 7074 }, { - "epoch": 0.74, - "grad_norm": 2.6588858988791624, - "learning_rate": 1.61669497112244e-06, - "loss": 0.6991, + "epoch": 0.5, + "grad_norm": 2.1059907136718805, + "learning_rate": 5.209661480850877e-06, + "loss": 0.5011, "step": 7075 }, { - "epoch": 0.74, - "grad_norm": 2.4764296283414584, - "learning_rate": 1.6154403461438273e-06, - "loss": 0.6273, + "epoch": 0.5, + "grad_norm": 1.642852960823384, + "learning_rate": 5.20851331910143e-06, + "loss": 0.5834, "step": 7076 }, { - "epoch": 0.74, - "grad_norm": 2.658819160311856, - "learning_rate": 1.6141861143735716e-06, - "loss": 0.5699, + "epoch": 0.5, + "grad_norm": 0.8343008879961659, + "learning_rate": 5.207365146337605e-06, + "loss": 0.4298, "step": 7077 }, { - "epoch": 0.74, - "grad_norm": 2.315234814191497, - "learning_rate": 1.612932275957384e-06, - "loss": 0.6531, + "epoch": 0.5, + "grad_norm": 1.7746126792704355, + "learning_rate": 5.20621696262005e-06, + "loss": 0.5902, "step": 7078 }, { - "epoch": 0.74, - "grad_norm": 4.409181763534464, - "learning_rate": 1.6116788310409332e-06, - "loss": 0.5654, + "epoch": 0.5, + "grad_norm": 1.6031878891625848, + "learning_rate": 5.205068768009416e-06, + "loss": 0.5794, "step": 7079 }, { - "epoch": 0.75, - "grad_norm": 2.573872111742514, - "learning_rate": 1.6104257797698431e-06, - "loss": 0.6174, + "epoch": 0.5, + "grad_norm": 0.8030260679962491, + "learning_rate": 5.203920562566354e-06, + "loss": 0.4611, "step": 7080 }, { - "epoch": 0.75, - "grad_norm": 2.1867723143982847, - "learning_rate": 1.6091731222896877e-06, - "loss": 0.6744, + "epoch": 0.5, + "grad_norm": 1.5772106680268816, + "learning_rate": 5.202772346351518e-06, + "loss": 0.4488, "step": 7081 }, { - "epoch": 0.75, - "grad_norm": 2.436773932857918, - "learning_rate": 1.6079208587459954e-06, - "loss": 0.6701, + "epoch": 0.5, + "grad_norm": 1.699718614123332, + "learning_rate": 5.201624119425559e-06, + "loss": 0.4872, "step": 7082 }, { - "epoch": 0.75, - "grad_norm": 2.6539143787518165, - "learning_rate": 1.6066689892842525e-06, - "loss": 0.5861, + "epoch": 0.5, + "grad_norm": 0.6660990822422064, + "learning_rate": 5.200475881849131e-06, + "loss": 0.4194, "step": 7083 }, { - "epoch": 0.75, - "grad_norm": 2.9851718926572968, - "learning_rate": 1.6054175140498967e-06, - "loss": 0.6667, + "epoch": 0.5, + "grad_norm": 2.421458284083621, + "learning_rate": 5.1993276336828865e-06, + "loss": 0.5867, "step": 7084 }, { - "epoch": 0.75, - "grad_norm": 2.130011313816518, - "learning_rate": 1.6041664331883233e-06, - "loss": 0.6442, + "epoch": 0.5, + "grad_norm": 1.6057217818039338, + "learning_rate": 5.198179374987483e-06, + "loss": 0.4803, "step": 7085 }, { - "epoch": 0.75, - "grad_norm": 2.8378492504021393, - "learning_rate": 1.6029157468448775e-06, - "loss": 0.6244, + "epoch": 0.5, + "grad_norm": 3.789475905772603, + "learning_rate": 5.197031105823572e-06, + "loss": 0.5598, "step": 7086 }, { - "epoch": 0.75, - "grad_norm": 2.2783353748879542, - "learning_rate": 1.601665455164858e-06, - "loss": 0.6466, + "epoch": 0.5, + "grad_norm": 1.6214953556258676, + "learning_rate": 5.1958828262518126e-06, + "loss": 0.4811, "step": 7087 }, { - "epoch": 0.75, - "grad_norm": 3.5318917740950577, - "learning_rate": 1.6004155582935232e-06, - "loss": 0.7144, + "epoch": 0.5, + "grad_norm": 1.5564539833113114, + "learning_rate": 5.194734536332856e-06, + "loss": 0.4883, "step": 7088 }, { - "epoch": 0.75, - "grad_norm": 2.405042680383137, - "learning_rate": 1.599166056376083e-06, - "loss": 0.554, + "epoch": 0.5, + "grad_norm": 1.6105748242494609, + "learning_rate": 5.193586236127362e-06, + "loss": 0.5338, "step": 7089 }, { - "epoch": 0.75, - "grad_norm": 2.537663239731998, - "learning_rate": 1.5979169495576991e-06, - "loss": 0.6213, + "epoch": 0.5, + "grad_norm": 1.6914237757829251, + "learning_rate": 5.1924379256959865e-06, + "loss": 0.4805, "step": 7090 }, { - "epoch": 0.75, - "grad_norm": 2.34724684013334, - "learning_rate": 1.5966682379834887e-06, - "loss": 0.5951, + "epoch": 0.5, + "grad_norm": 1.3561994336142162, + "learning_rate": 5.1912896050993875e-06, + "loss": 0.5041, "step": 7091 }, { - "epoch": 0.75, - "grad_norm": 2.683042293367126, - "learning_rate": 1.5954199217985233e-06, - "loss": 0.6249, + "epoch": 0.5, + "grad_norm": 2.42275735902914, + "learning_rate": 5.190141274398225e-06, + "loss": 0.4571, "step": 7092 }, { - "epoch": 0.75, - "grad_norm": 2.962353374507347, - "learning_rate": 1.5941720011478323e-06, - "loss": 0.5715, + "epoch": 0.5, + "grad_norm": 1.5627965092120926, + "learning_rate": 5.188992933653155e-06, + "loss": 0.545, "step": 7093 }, { - "epoch": 0.75, - "grad_norm": 2.6239346592286377, - "learning_rate": 1.5929244761763924e-06, - "loss": 0.5831, + "epoch": 0.5, + "grad_norm": 2.324944348807185, + "learning_rate": 5.1878445829248395e-06, + "loss": 0.5163, "step": 7094 }, { - "epoch": 0.75, - "grad_norm": 2.338124980340621, - "learning_rate": 1.591677347029137e-06, - "loss": 0.576, + "epoch": 0.5, + "grad_norm": 1.6378099612781631, + "learning_rate": 5.1866962222739346e-06, + "loss": 0.5363, "step": 7095 }, { - "epoch": 0.75, - "grad_norm": 3.8481926089677714, - "learning_rate": 1.5904306138509545e-06, - "loss": 0.6755, + "epoch": 0.5, + "grad_norm": 1.605883906034545, + "learning_rate": 5.185547851761102e-06, + "loss": 0.5504, "step": 7096 }, { - "epoch": 0.75, - "grad_norm": 2.540089155983873, - "learning_rate": 1.5891842767866872e-06, - "loss": 0.5622, + "epoch": 0.5, + "grad_norm": 0.8669754574836585, + "learning_rate": 5.184399471447005e-06, + "loss": 0.4331, "step": 7097 }, { - "epoch": 0.75, - "grad_norm": 2.4998455761237133, - "learning_rate": 1.587938335981133e-06, - "loss": 0.5839, + "epoch": 0.5, + "grad_norm": 1.6836662874908435, + "learning_rate": 5.183251081392303e-06, + "loss": 0.6004, "step": 7098 }, { - "epoch": 0.75, - "grad_norm": 3.281273941072614, - "learning_rate": 1.5866927915790391e-06, - "loss": 0.6369, + "epoch": 0.5, + "grad_norm": 2.2619939821475747, + "learning_rate": 5.182102681657657e-06, + "loss": 0.5065, "step": 7099 }, { - "epoch": 0.75, - "grad_norm": 2.401257413792379, - "learning_rate": 1.585447643725108e-06, - "loss": 0.6927, + "epoch": 0.5, + "grad_norm": 1.6678149143204306, + "learning_rate": 5.180954272303732e-06, + "loss": 0.4787, "step": 7100 }, { - "epoch": 0.75, - "grad_norm": 2.885694164271074, - "learning_rate": 1.5842028925640002e-06, - "loss": 0.5983, + "epoch": 0.5, + "grad_norm": 1.8957925922309402, + "learning_rate": 5.179805853391187e-06, + "loss": 0.6099, "step": 7101 }, { - "epoch": 0.75, - "grad_norm": 2.249464206134762, - "learning_rate": 1.5829585382403273e-06, - "loss": 0.552, + "epoch": 0.5, + "grad_norm": 1.4585668255867563, + "learning_rate": 5.1786574249806895e-06, + "loss": 0.504, "step": 7102 }, { - "epoch": 0.75, - "grad_norm": 2.2266693760673335, - "learning_rate": 1.5817145808986534e-06, - "loss": 0.6486, + "epoch": 0.5, + "grad_norm": 1.8331054119238788, + "learning_rate": 5.1775089871329e-06, + "loss": 0.5226, "step": 7103 }, { - "epoch": 0.75, - "grad_norm": 2.6048570897327545, - "learning_rate": 1.5804710206834972e-06, - "loss": 0.6163, + "epoch": 0.5, + "grad_norm": 1.7182541145377153, + "learning_rate": 5.1763605399084846e-06, + "loss": 0.5484, "step": 7104 }, { - "epoch": 0.75, - "grad_norm": 2.52361431641286, - "learning_rate": 1.5792278577393327e-06, - "loss": 0.5478, + "epoch": 0.5, + "grad_norm": 3.2294353557351494, + "learning_rate": 5.175212083368108e-06, + "loss": 0.5779, "step": 7105 }, { - "epoch": 0.75, - "grad_norm": 3.0440511092050317, - "learning_rate": 1.577985092210587e-06, - "loss": 0.648, + "epoch": 0.5, + "grad_norm": 1.5731470206466514, + "learning_rate": 5.174063617572435e-06, + "loss": 0.5827, "step": 7106 }, { - "epoch": 0.75, - "grad_norm": 2.5613872240202467, - "learning_rate": 1.5767427242416433e-06, - "loss": 0.6007, + "epoch": 0.5, + "grad_norm": 1.558400605808407, + "learning_rate": 5.172915142582132e-06, + "loss": 0.529, "step": 7107 }, { - "epoch": 0.75, - "grad_norm": 2.4150239405907303, - "learning_rate": 1.575500753976834e-06, - "loss": 0.5187, + "epoch": 0.5, + "grad_norm": 0.7873223166390495, + "learning_rate": 5.171766658457866e-06, + "loss": 0.4315, "step": 7108 }, { - "epoch": 0.75, - "grad_norm": 3.236040550601962, - "learning_rate": 1.5742591815604463e-06, - "loss": 0.6156, + "epoch": 0.5, + "grad_norm": 1.8004574508128999, + "learning_rate": 5.170618165260304e-06, + "loss": 0.513, "step": 7109 }, { - "epoch": 0.75, - "grad_norm": 2.8872460453894053, - "learning_rate": 1.5730180071367247e-06, - "loss": 0.6328, + "epoch": 0.5, + "grad_norm": 1.4179682013113315, + "learning_rate": 5.1694696630501104e-06, + "loss": 0.5089, "step": 7110 }, { - "epoch": 0.75, - "grad_norm": 2.329877740414494, - "learning_rate": 1.5717772308498651e-06, - "loss": 0.6179, + "epoch": 0.5, + "grad_norm": 2.0751890925869887, + "learning_rate": 5.168321151887955e-06, + "loss": 0.5386, "step": 7111 }, { - "epoch": 0.75, - "grad_norm": 2.635293780550099, - "learning_rate": 1.5705368528440178e-06, - "loss": 0.6769, + "epoch": 0.5, + "grad_norm": 1.6926506016262834, + "learning_rate": 5.167172631834506e-06, + "loss": 0.5428, "step": 7112 }, { - "epoch": 0.75, - "grad_norm": 2.5346783855495905, - "learning_rate": 1.569296873263283e-06, - "loss": 0.4746, + "epoch": 0.5, + "grad_norm": 1.4481960942188872, + "learning_rate": 5.166024102950434e-06, + "loss": 0.4843, "step": 7113 }, { - "epoch": 0.75, - "grad_norm": 2.1425884362151524, - "learning_rate": 1.5680572922517206e-06, - "loss": 0.5694, + "epoch": 0.5, + "grad_norm": 0.7325949117503594, + "learning_rate": 5.1648755652964044e-06, + "loss": 0.462, "step": 7114 }, { - "epoch": 0.75, - "grad_norm": 2.1871638635512443, - "learning_rate": 1.5668181099533431e-06, - "loss": 0.5599, + "epoch": 0.5, + "grad_norm": 1.8990580287602454, + "learning_rate": 5.163727018933091e-06, + "loss": 0.4894, "step": 7115 }, { - "epoch": 0.75, - "grad_norm": 2.6126133591306533, - "learning_rate": 1.5655793265121132e-06, - "loss": 0.6576, + "epoch": 0.5, + "grad_norm": 2.1818327692016455, + "learning_rate": 5.162578463921159e-06, + "loss": 0.5333, "step": 7116 }, { - "epoch": 0.75, - "grad_norm": 3.6620967020652584, - "learning_rate": 1.5643409420719475e-06, - "loss": 0.7142, + "epoch": 0.51, + "grad_norm": 1.512931452219238, + "learning_rate": 5.1614299003212846e-06, + "loss": 0.5845, "step": 7117 }, { - "epoch": 0.75, - "grad_norm": 2.539598366314506, - "learning_rate": 1.5631029567767197e-06, - "loss": 0.6241, + "epoch": 0.51, + "grad_norm": 1.7527905185203418, + "learning_rate": 5.160281328194134e-06, + "loss": 0.4174, "step": 7118 }, { - "epoch": 0.75, - "grad_norm": 3.873095508009267, - "learning_rate": 1.5618653707702553e-06, - "loss": 0.6067, + "epoch": 0.51, + "grad_norm": 1.5378745881261426, + "learning_rate": 5.159132747600379e-06, + "loss": 0.5251, "step": 7119 }, { - "epoch": 0.75, - "grad_norm": 3.2962610827522454, - "learning_rate": 1.560628184196335e-06, - "loss": 0.5866, + "epoch": 0.51, + "grad_norm": 1.6899676709529583, + "learning_rate": 5.157984158600695e-06, + "loss": 0.5712, "step": 7120 }, { - "epoch": 0.75, - "grad_norm": 2.418665449353965, - "learning_rate": 1.55939139719869e-06, - "loss": 0.5084, + "epoch": 0.51, + "grad_norm": 0.7542740258759251, + "learning_rate": 5.156835561255752e-06, + "loss": 0.4498, "step": 7121 }, { - "epoch": 0.75, - "grad_norm": 54.21175226317471, - "learning_rate": 1.5581550099210053e-06, - "loss": 0.5915, + "epoch": 0.51, + "grad_norm": 1.6807544819822395, + "learning_rate": 5.155686955626223e-06, + "loss": 0.5263, "step": 7122 }, { - "epoch": 0.75, - "grad_norm": 7.5068547209902965, - "learning_rate": 1.5569190225069226e-06, - "loss": 0.6405, + "epoch": 0.51, + "grad_norm": 2.686875036578096, + "learning_rate": 5.1545383417727825e-06, + "loss": 0.5701, "step": 7123 }, { - "epoch": 0.75, - "grad_norm": 2.7309517743093017, - "learning_rate": 1.5556834351000356e-06, - "loss": 0.6173, + "epoch": 0.51, + "grad_norm": 1.620194273283092, + "learning_rate": 5.153389719756102e-06, + "loss": 0.4982, "step": 7124 }, { - "epoch": 0.75, - "grad_norm": 1.0711077498083168, - "learning_rate": 1.554448247843891e-06, - "loss": 0.5749, + "epoch": 0.51, + "grad_norm": 2.138943393903928, + "learning_rate": 5.152241089636858e-06, + "loss": 0.5592, "step": 7125 }, { - "epoch": 0.75, - "grad_norm": 2.4429328414022162, - "learning_rate": 1.5532134608819876e-06, - "loss": 0.5991, + "epoch": 0.51, + "grad_norm": 2.1955589872455876, + "learning_rate": 5.151092451475722e-06, + "loss": 0.5327, "step": 7126 }, { - "epoch": 0.75, - "grad_norm": 4.816964217476705, - "learning_rate": 1.55197907435778e-06, - "loss": 0.6666, + "epoch": 0.51, + "grad_norm": 1.5091118869387572, + "learning_rate": 5.149943805333372e-06, + "loss": 0.4988, "step": 7127 }, { - "epoch": 0.75, - "grad_norm": 2.535197145555227, - "learning_rate": 1.5507450884146784e-06, - "loss": 0.5548, + "epoch": 0.51, + "grad_norm": 1.7666169329152845, + "learning_rate": 5.148795151270483e-06, + "loss": 0.5643, "step": 7128 }, { - "epoch": 0.75, - "grad_norm": 2.319066286953146, - "learning_rate": 1.5495115031960418e-06, - "loss": 0.6441, + "epoch": 0.51, + "grad_norm": 2.189725432687955, + "learning_rate": 5.147646489347727e-06, + "loss": 0.5813, "step": 7129 }, { - "epoch": 0.75, - "grad_norm": 2.8047667310373727, - "learning_rate": 1.5482783188451822e-06, - "loss": 0.6417, + "epoch": 0.51, + "grad_norm": 1.5298783823772737, + "learning_rate": 5.146497819625787e-06, + "loss": 0.5618, "step": 7130 }, { - "epoch": 0.75, - "grad_norm": 3.9106777141475417, - "learning_rate": 1.5470455355053704e-06, - "loss": 0.6389, + "epoch": 0.51, + "grad_norm": 1.8082665095520953, + "learning_rate": 5.145349142165333e-06, + "loss": 0.5304, "step": 7131 }, { - "epoch": 0.75, - "grad_norm": 2.6056116741953828, - "learning_rate": 1.5458131533198261e-06, - "loss": 0.6165, + "epoch": 0.51, + "grad_norm": 2.3450208913873527, + "learning_rate": 5.144200457027045e-06, + "loss": 0.5225, "step": 7132 }, { - "epoch": 0.75, - "grad_norm": 2.9784665393315755, - "learning_rate": 1.5445811724317267e-06, - "loss": 0.584, + "epoch": 0.51, + "grad_norm": 0.6968448027459868, + "learning_rate": 5.143051764271599e-06, + "loss": 0.4126, "step": 7133 }, { - "epoch": 0.75, - "grad_norm": 2.057328186390915, - "learning_rate": 1.5433495929841979e-06, - "loss": 0.538, + "epoch": 0.51, + "grad_norm": 1.961580709313409, + "learning_rate": 5.141903063959677e-06, + "loss": 0.5133, "step": 7134 }, { - "epoch": 0.75, - "grad_norm": 3.899774453962374, - "learning_rate": 1.5421184151203194e-06, - "loss": 0.6381, + "epoch": 0.51, + "grad_norm": 0.7471798602529724, + "learning_rate": 5.1407543561519535e-06, + "loss": 0.4694, "step": 7135 }, { - "epoch": 0.75, - "grad_norm": 2.302231817534724, - "learning_rate": 1.5408876389831278e-06, - "loss": 0.5251, + "epoch": 0.51, + "grad_norm": 4.038518251440893, + "learning_rate": 5.139605640909108e-06, + "loss": 0.5118, "step": 7136 }, { - "epoch": 0.75, - "grad_norm": 2.779434622195697, - "learning_rate": 1.5396572647156126e-06, - "loss": 0.6052, + "epoch": 0.51, + "grad_norm": 1.7778208913877238, + "learning_rate": 5.138456918291819e-06, + "loss": 0.5357, "step": 7137 }, { - "epoch": 0.75, - "grad_norm": 2.6994213766514155, - "learning_rate": 1.538427292460714e-06, - "loss": 0.6085, + "epoch": 0.51, + "grad_norm": 1.4885933662221587, + "learning_rate": 5.137308188360765e-06, + "loss": 0.5141, "step": 7138 }, { - "epoch": 0.75, - "grad_norm": 2.504683013737572, - "learning_rate": 1.5371977223613238e-06, - "loss": 0.6257, + "epoch": 0.51, + "grad_norm": 1.9774818655028785, + "learning_rate": 5.13615945117663e-06, + "loss": 0.4445, "step": 7139 }, { - "epoch": 0.75, - "grad_norm": 2.2855288271378664, - "learning_rate": 1.535968554560293e-06, - "loss": 0.6142, + "epoch": 0.51, + "grad_norm": 1.8716662307838772, + "learning_rate": 5.13501070680009e-06, + "loss": 0.5681, "step": 7140 }, { - "epoch": 0.75, - "grad_norm": 2.4654873006023608, - "learning_rate": 1.5347397892004234e-06, - "loss": 0.6748, + "epoch": 0.51, + "grad_norm": 1.810768876060837, + "learning_rate": 5.133861955291827e-06, + "loss": 0.5241, "step": 7141 }, { - "epoch": 0.75, - "grad_norm": 2.2166955270138478, - "learning_rate": 1.533511426424466e-06, - "loss": 0.5971, + "epoch": 0.51, + "grad_norm": 1.5727414963843227, + "learning_rate": 5.132713196712521e-06, + "loss": 0.5365, "step": 7142 }, { - "epoch": 0.75, - "grad_norm": 2.5878170941910934, - "learning_rate": 1.532283466375133e-06, - "loss": 0.5781, + "epoch": 0.51, + "grad_norm": 2.918848231131799, + "learning_rate": 5.131564431122857e-06, + "loss": 0.5212, "step": 7143 }, { - "epoch": 0.75, - "grad_norm": 3.088438430805986, - "learning_rate": 1.5310559091950805e-06, - "loss": 0.5924, + "epoch": 0.51, + "grad_norm": 0.6825396948884368, + "learning_rate": 5.1304156585835116e-06, + "loss": 0.4365, "step": 7144 }, { - "epoch": 0.75, - "grad_norm": 3.455905487244043, - "learning_rate": 1.5298287550269248e-06, - "loss": 0.6324, + "epoch": 0.51, + "grad_norm": 1.8334080262118002, + "learning_rate": 5.1292668791551705e-06, + "loss": 0.5033, "step": 7145 }, { - "epoch": 0.75, - "grad_norm": 2.296776703964561, - "learning_rate": 1.5286020040132344e-06, - "loss": 0.6318, + "epoch": 0.51, + "grad_norm": 1.854349327819781, + "learning_rate": 5.128118092898514e-06, + "loss": 0.5805, "step": 7146 }, { - "epoch": 0.75, - "grad_norm": 3.502122104084021, - "learning_rate": 1.5273756562965286e-06, - "loss": 0.5917, + "epoch": 0.51, + "grad_norm": 2.127937879591351, + "learning_rate": 5.126969299874225e-06, + "loss": 0.5655, "step": 7147 }, { - "epoch": 0.75, - "grad_norm": 2.813679350238381, - "learning_rate": 1.5261497120192791e-06, - "loss": 0.7005, + "epoch": 0.51, + "grad_norm": 1.863725899590818, + "learning_rate": 5.125820500142988e-06, + "loss": 0.5844, "step": 7148 }, { - "epoch": 0.75, - "grad_norm": 2.386900010136105, - "learning_rate": 1.5249241713239148e-06, - "loss": 0.6208, + "epoch": 0.51, + "grad_norm": 2.813888497787206, + "learning_rate": 5.124671693765487e-06, + "loss": 0.4989, "step": 7149 }, { - "epoch": 0.75, - "grad_norm": 2.097816570449662, - "learning_rate": 1.5236990343528157e-06, - "loss": 0.5714, + "epoch": 0.51, + "grad_norm": 1.6518913501196617, + "learning_rate": 5.123522880802403e-06, + "loss": 0.5096, "step": 7150 }, { - "epoch": 0.75, - "grad_norm": 2.8641415092943023, - "learning_rate": 1.522474301248314e-06, - "loss": 0.6228, + "epoch": 0.51, + "grad_norm": 2.7558537853024045, + "learning_rate": 5.122374061314424e-06, + "loss": 0.5331, "step": 7151 }, { - "epoch": 0.75, - "grad_norm": 2.953458669418631, - "learning_rate": 1.5212499721526942e-06, - "loss": 0.635, + "epoch": 0.51, + "grad_norm": 1.9676623770108665, + "learning_rate": 5.121225235362231e-06, + "loss": 0.5326, "step": 7152 }, { - "epoch": 0.75, - "grad_norm": 2.293638293253605, - "learning_rate": 1.5200260472081962e-06, - "loss": 0.6122, + "epoch": 0.51, + "grad_norm": 2.2094081635692677, + "learning_rate": 5.120076403006511e-06, + "loss": 0.4963, "step": 7153 }, { - "epoch": 0.75, - "grad_norm": 2.165393072045299, - "learning_rate": 1.5188025265570127e-06, - "loss": 0.5967, + "epoch": 0.51, + "grad_norm": 0.6927926488967263, + "learning_rate": 5.118927564307947e-06, + "loss": 0.4714, "step": 7154 }, { - "epoch": 0.75, - "grad_norm": 2.6653448901727215, - "learning_rate": 1.51757941034129e-06, - "loss": 0.5992, + "epoch": 0.51, + "grad_norm": 1.6738943270583186, + "learning_rate": 5.1177787193272265e-06, + "loss": 0.5575, "step": 7155 }, { - "epoch": 0.75, - "grad_norm": 2.590367390269278, - "learning_rate": 1.5163566987031246e-06, - "loss": 0.5784, + "epoch": 0.51, + "grad_norm": 1.6185888292506552, + "learning_rate": 5.116629868125036e-06, + "loss": 0.5773, "step": 7156 }, { - "epoch": 0.75, - "grad_norm": 2.274011024448419, - "learning_rate": 1.5151343917845662e-06, - "loss": 0.5486, + "epoch": 0.51, + "grad_norm": 1.6562085052544793, + "learning_rate": 5.1154810107620585e-06, + "loss": 0.5734, "step": 7157 }, { - "epoch": 0.75, - "grad_norm": 2.640547755917543, - "learning_rate": 1.513912489727621e-06, - "loss": 0.6622, + "epoch": 0.51, + "grad_norm": 1.811707302117279, + "learning_rate": 5.114332147298986e-06, + "loss": 0.5661, "step": 7158 }, { - "epoch": 0.75, - "grad_norm": 3.435856702185333, - "learning_rate": 1.5126909926742461e-06, - "loss": 0.5952, + "epoch": 0.51, + "grad_norm": 2.114680323180185, + "learning_rate": 5.1131832777965e-06, + "loss": 0.5703, "step": 7159 }, { - "epoch": 0.75, - "grad_norm": 2.327306617131621, - "learning_rate": 1.511469900766352e-06, - "loss": 0.5737, + "epoch": 0.51, + "grad_norm": 0.7267628351405168, + "learning_rate": 5.11203440231529e-06, + "loss": 0.4345, "step": 7160 }, { - "epoch": 0.75, - "grad_norm": 2.954132593391894, - "learning_rate": 1.510249214145798e-06, - "loss": 0.6354, + "epoch": 0.51, + "grad_norm": 1.6916179885567026, + "learning_rate": 5.110885520916044e-06, + "loss": 0.5199, "step": 7161 }, { - "epoch": 0.75, - "grad_norm": 2.435417507636399, - "learning_rate": 1.5090289329544028e-06, - "loss": 0.658, + "epoch": 0.51, + "grad_norm": 1.850937205413982, + "learning_rate": 5.109736633659448e-06, + "loss": 0.5967, "step": 7162 }, { - "epoch": 0.75, - "grad_norm": 2.9854856200768136, - "learning_rate": 1.5078090573339365e-06, - "loss": 0.6282, + "epoch": 0.51, + "grad_norm": 3.1097579920131526, + "learning_rate": 5.1085877406061915e-06, + "loss": 0.5991, "step": 7163 }, { - "epoch": 0.75, - "grad_norm": 3.106673158220274, - "learning_rate": 1.506589587426119e-06, - "loss": 0.5409, + "epoch": 0.51, + "grad_norm": 0.793779886180151, + "learning_rate": 5.107438841816963e-06, + "loss": 0.4652, "step": 7164 }, { - "epoch": 0.75, - "grad_norm": 2.4494465926620497, - "learning_rate": 1.5053705233726228e-06, - "loss": 0.6402, + "epoch": 0.51, + "grad_norm": 2.0311785186349547, + "learning_rate": 5.1062899373524495e-06, + "loss": 0.5286, "step": 7165 }, { - "epoch": 0.75, - "grad_norm": 2.1863048410990804, - "learning_rate": 1.5041518653150777e-06, - "loss": 0.6386, + "epoch": 0.51, + "grad_norm": 1.7501828275635192, + "learning_rate": 5.105141027273344e-06, + "loss": 0.4693, "step": 7166 }, { - "epoch": 0.75, - "grad_norm": 2.2078949677416864, - "learning_rate": 1.5029336133950635e-06, - "loss": 0.5456, + "epoch": 0.51, + "grad_norm": 1.839859039851572, + "learning_rate": 5.103992111640331e-06, + "loss": 0.6239, "step": 7167 }, { - "epoch": 0.75, - "grad_norm": 3.738221773697226, - "learning_rate": 1.5017157677541144e-06, - "loss": 0.6146, + "epoch": 0.51, + "grad_norm": 1.5347476768920956, + "learning_rate": 5.102843190514104e-06, + "loss": 0.532, "step": 7168 }, { - "epoch": 0.75, - "grad_norm": 2.250673304439142, - "learning_rate": 1.500498328533715e-06, - "loss": 0.5573, + "epoch": 0.51, + "grad_norm": 1.4445105375159235, + "learning_rate": 5.101694263955349e-06, + "loss": 0.5094, "step": 7169 }, { - "epoch": 0.75, - "grad_norm": 2.5150783835660797, - "learning_rate": 1.4992812958753023e-06, - "loss": 0.5967, + "epoch": 0.51, + "grad_norm": 1.5152072381311248, + "learning_rate": 5.100545332024759e-06, + "loss": 0.5185, "step": 7170 }, { - "epoch": 0.75, - "grad_norm": 0.9829915284477175, - "learning_rate": 1.4980646699202684e-06, - "loss": 0.5587, + "epoch": 0.51, + "grad_norm": 1.7470586644169832, + "learning_rate": 5.099396394783024e-06, + "loss": 0.4763, "step": 7171 }, { - "epoch": 0.75, - "grad_norm": 2.1533475910931696, - "learning_rate": 1.4968484508099606e-06, - "loss": 0.6385, + "epoch": 0.51, + "grad_norm": 1.589972242158325, + "learning_rate": 5.098247452290834e-06, + "loss": 0.5458, "step": 7172 }, { - "epoch": 0.75, - "grad_norm": 5.851990435700211, - "learning_rate": 1.4956326386856723e-06, - "loss": 0.5869, + "epoch": 0.51, + "grad_norm": 1.5267982809836074, + "learning_rate": 5.097098504608882e-06, + "loss": 0.5617, "step": 7173 }, { - "epoch": 0.75, - "grad_norm": 2.7719426441646275, - "learning_rate": 1.494417233688653e-06, - "loss": 0.5874, + "epoch": 0.51, + "grad_norm": 1.7781590189794554, + "learning_rate": 5.095949551797859e-06, + "loss": 0.5255, "step": 7174 }, { - "epoch": 0.76, - "grad_norm": 3.102882961445121, - "learning_rate": 1.4932022359601056e-06, - "loss": 0.6421, + "epoch": 0.51, + "grad_norm": 1.7461475656270056, + "learning_rate": 5.094800593918454e-06, + "loss": 0.4842, "step": 7175 }, { - "epoch": 0.76, - "grad_norm": 2.6384157210222536, - "learning_rate": 1.4919876456411875e-06, - "loss": 0.5523, + "epoch": 0.51, + "grad_norm": 1.7348068432501793, + "learning_rate": 5.09365163103136e-06, + "loss": 0.4883, "step": 7176 }, { - "epoch": 0.76, - "grad_norm": 2.378591334938083, - "learning_rate": 1.490773462873002e-06, - "loss": 0.544, + "epoch": 0.51, + "grad_norm": 1.6683263207217836, + "learning_rate": 5.09250266319727e-06, + "loss": 0.5545, "step": 7177 }, { - "epoch": 0.76, - "grad_norm": 20.641360117403863, - "learning_rate": 1.4895596877966128e-06, - "loss": 0.636, + "epoch": 0.51, + "grad_norm": 1.5183367623448456, + "learning_rate": 5.091353690476876e-06, + "loss": 0.5222, "step": 7178 }, { - "epoch": 0.76, - "grad_norm": 2.4236725068487304, - "learning_rate": 1.4883463205530302e-06, - "loss": 0.6411, + "epoch": 0.51, + "grad_norm": 2.199029254173726, + "learning_rate": 5.090204712930871e-06, + "loss": 0.598, "step": 7179 }, { - "epoch": 0.76, - "grad_norm": 2.2183036139750714, - "learning_rate": 1.4871333612832206e-06, - "loss": 0.5712, + "epoch": 0.51, + "grad_norm": 1.7310512453946842, + "learning_rate": 5.089055730619946e-06, + "loss": 0.5276, "step": 7180 }, { - "epoch": 0.76, - "grad_norm": 3.050796166901124, - "learning_rate": 1.4859208101281041e-06, - "loss": 0.6366, + "epoch": 0.51, + "grad_norm": 0.8464871528648609, + "learning_rate": 5.087906743604797e-06, + "loss": 0.4356, "step": 7181 }, { - "epoch": 0.76, - "grad_norm": 2.4152995939569166, - "learning_rate": 1.48470866722855e-06, - "loss": 0.5498, + "epoch": 0.51, + "grad_norm": 1.8354474276969215, + "learning_rate": 5.086757751946115e-06, + "loss": 0.5648, "step": 7182 }, { - "epoch": 0.76, - "grad_norm": 2.362918250869478, - "learning_rate": 1.4834969327253795e-06, - "loss": 0.5972, + "epoch": 0.51, + "grad_norm": 1.6984935815932878, + "learning_rate": 5.0856087557045965e-06, + "loss": 0.4851, "step": 7183 }, { - "epoch": 0.76, - "grad_norm": 2.0541545853510184, - "learning_rate": 1.48228560675937e-06, + "epoch": 0.51, + "grad_norm": 1.722918994163249, + "learning_rate": 5.084459754940931e-06, "loss": 0.5475, "step": 7184 }, { - "epoch": 0.76, - "grad_norm": 4.0605689354251995, - "learning_rate": 1.481074689471252e-06, - "loss": 0.5439, + "epoch": 0.51, + "grad_norm": 1.9591364963612192, + "learning_rate": 5.083310749715815e-06, + "loss": 0.5988, "step": 7185 }, { - "epoch": 0.76, - "grad_norm": 2.1804727796434964, - "learning_rate": 1.479864181001704e-06, - "loss": 0.6155, + "epoch": 0.51, + "grad_norm": 2.0784154677264985, + "learning_rate": 5.082161740089944e-06, + "loss": 0.5634, "step": 7186 }, { - "epoch": 0.76, - "grad_norm": 4.353882130124197, - "learning_rate": 1.4786540814913586e-06, - "loss": 0.6792, + "epoch": 0.51, + "grad_norm": 1.496673241833887, + "learning_rate": 5.081012726124011e-06, + "loss": 0.5965, "step": 7187 }, { - "epoch": 0.76, - "grad_norm": 2.4609607660129162, - "learning_rate": 1.4774443910808023e-06, - "loss": 0.6776, + "epoch": 0.51, + "grad_norm": 2.2261573418980114, + "learning_rate": 5.079863707878712e-06, + "loss": 0.5102, "step": 7188 }, { - "epoch": 0.76, - "grad_norm": 2.6666982763784812, - "learning_rate": 1.476235109910576e-06, - "loss": 0.6191, + "epoch": 0.51, + "grad_norm": 2.1496305653936076, + "learning_rate": 5.07871468541474e-06, + "loss": 0.5347, "step": 7189 }, { - "epoch": 0.76, - "grad_norm": 3.56167731467775, - "learning_rate": 1.4750262381211665e-06, - "loss": 0.5887, + "epoch": 0.51, + "grad_norm": 0.7681969251651329, + "learning_rate": 5.077565658792793e-06, + "loss": 0.4736, "step": 7190 }, { - "epoch": 0.76, - "grad_norm": 2.44278557698414, - "learning_rate": 1.4738177758530208e-06, - "loss": 0.6259, + "epoch": 0.51, + "grad_norm": 2.158904767833304, + "learning_rate": 5.076416628073565e-06, + "loss": 0.5321, "step": 7191 }, { - "epoch": 0.76, - "grad_norm": 2.5250241812484906, - "learning_rate": 1.4726097232465314e-06, - "loss": 0.6089, + "epoch": 0.51, + "grad_norm": 1.6847662975240523, + "learning_rate": 5.075267593317751e-06, + "loss": 0.5188, "step": 7192 }, { - "epoch": 0.76, - "grad_norm": 3.2400336695589744, - "learning_rate": 1.471402080442047e-06, - "loss": 0.5666, + "epoch": 0.51, + "grad_norm": 1.3552674680186045, + "learning_rate": 5.074118554586047e-06, + "loss": 0.4671, "step": 7193 }, { - "epoch": 0.76, - "grad_norm": 2.7151462386306657, - "learning_rate": 1.470194847579871e-06, - "loss": 0.6301, + "epoch": 0.51, + "grad_norm": 2.151352171650516, + "learning_rate": 5.072969511939152e-06, + "loss": 0.5051, "step": 7194 }, { - "epoch": 0.76, - "grad_norm": 2.698877450197644, - "learning_rate": 1.4689880248002537e-06, - "loss": 0.7084, + "epoch": 0.51, + "grad_norm": 1.954993407580758, + "learning_rate": 5.071820465437759e-06, + "loss": 0.5974, "step": 7195 }, { - "epoch": 0.76, - "grad_norm": 2.6410464096856665, - "learning_rate": 1.467781612243399e-06, - "loss": 0.6245, + "epoch": 0.51, + "grad_norm": 1.621000747261092, + "learning_rate": 5.070671415142566e-06, + "loss": 0.5398, "step": 7196 }, { - "epoch": 0.76, - "grad_norm": 2.612323546310781, - "learning_rate": 1.466575610049466e-06, - "loss": 0.6783, + "epoch": 0.51, + "grad_norm": 1.5758683918705338, + "learning_rate": 5.069522361114269e-06, + "loss": 0.5637, "step": 7197 }, { - "epoch": 0.76, - "grad_norm": 2.7178474296193564, - "learning_rate": 1.4653700183585663e-06, - "loss": 0.6122, + "epoch": 0.51, + "grad_norm": 1.5603712280776678, + "learning_rate": 5.068373303413566e-06, + "loss": 0.4902, "step": 7198 }, { - "epoch": 0.76, - "grad_norm": 3.512024045937146, - "learning_rate": 1.4641648373107598e-06, - "loss": 0.6147, + "epoch": 0.51, + "grad_norm": 1.7591020585422592, + "learning_rate": 5.067224242101151e-06, + "loss": 0.4751, "step": 7199 }, { - "epoch": 0.76, - "grad_norm": 2.0834058077279685, - "learning_rate": 1.4629600670460603e-06, - "loss": 0.5514, + "epoch": 0.51, + "grad_norm": 1.644581644456421, + "learning_rate": 5.066075177237726e-06, + "loss": 0.5653, "step": 7200 }, { - "epoch": 0.76, - "grad_norm": 2.216935664488474, - "learning_rate": 1.4617557077044352e-06, - "loss": 0.6227, + "epoch": 0.51, + "grad_norm": 2.0052449603081564, + "learning_rate": 5.0649261088839854e-06, + "loss": 0.5612, "step": 7201 }, { - "epoch": 0.76, - "grad_norm": 2.46166081455518, - "learning_rate": 1.4605517594258046e-06, - "loss": 0.5871, + "epoch": 0.51, + "grad_norm": 1.6038822523909237, + "learning_rate": 5.063777037100628e-06, + "loss": 0.4771, "step": 7202 }, { - "epoch": 0.76, - "grad_norm": 2.1948644668696375, - "learning_rate": 1.4593482223500406e-06, - "loss": 0.6353, + "epoch": 0.51, + "grad_norm": 1.79919749248377, + "learning_rate": 5.062627961948352e-06, + "loss": 0.5935, "step": 7203 }, { - "epoch": 0.76, - "grad_norm": 4.33193407924704, - "learning_rate": 1.4581450966169648e-06, - "loss": 0.6246, + "epoch": 0.51, + "grad_norm": 1.9810872651517457, + "learning_rate": 5.061478883487854e-06, + "loss": 0.4846, "step": 7204 }, { - "epoch": 0.76, - "grad_norm": 2.624255396763174, - "learning_rate": 1.4569423823663515e-06, - "loss": 0.6982, + "epoch": 0.51, + "grad_norm": 1.6822731765585872, + "learning_rate": 5.060329801779834e-06, + "loss": 0.5035, "step": 7205 }, { - "epoch": 0.76, - "grad_norm": 2.3813097303259028, - "learning_rate": 1.4557400797379306e-06, - "loss": 0.5593, + "epoch": 0.51, + "grad_norm": 1.906475803232001, + "learning_rate": 5.059180716884988e-06, + "loss": 0.5763, "step": 7206 }, { - "epoch": 0.76, - "grad_norm": 2.8170072633637067, - "learning_rate": 1.4545381888713833e-06, - "loss": 0.622, + "epoch": 0.51, + "grad_norm": 1.631273332509671, + "learning_rate": 5.058031628864017e-06, + "loss": 0.5189, "step": 7207 }, { - "epoch": 0.76, - "grad_norm": 1.0227331919960505, - "learning_rate": 1.45333670990634e-06, - "loss": 0.4883, + "epoch": 0.51, + "grad_norm": 1.5515088499171164, + "learning_rate": 5.056882537777619e-06, + "loss": 0.5259, "step": 7208 }, { - "epoch": 0.76, - "grad_norm": 2.9590588104321056, - "learning_rate": 1.452135642982384e-06, - "loss": 0.6183, + "epoch": 0.51, + "grad_norm": 1.557540216053646, + "learning_rate": 5.055733443686492e-06, + "loss": 0.5263, "step": 7209 }, { - "epoch": 0.76, - "grad_norm": 2.33117855582943, - "learning_rate": 1.450934988239054e-06, - "loss": 0.6134, + "epoch": 0.51, + "grad_norm": 1.5962054424753847, + "learning_rate": 5.054584346651336e-06, + "loss": 0.587, "step": 7210 }, { - "epoch": 0.76, - "grad_norm": 2.4557426055942786, - "learning_rate": 1.4497347458158384e-06, - "loss": 0.62, + "epoch": 0.51, + "grad_norm": 1.7644523494004751, + "learning_rate": 5.0534352467328494e-06, + "loss": 0.5303, "step": 7211 }, { - "epoch": 0.76, - "grad_norm": 2.926982164970039, - "learning_rate": 1.4485349158521756e-06, - "loss": 0.6036, + "epoch": 0.51, + "grad_norm": 1.6920811478208428, + "learning_rate": 5.0522861439917326e-06, + "loss": 0.5787, "step": 7212 }, { - "epoch": 0.76, - "grad_norm": 2.2092246121938164, - "learning_rate": 1.447335498487462e-06, - "loss": 0.6053, + "epoch": 0.51, + "grad_norm": 1.5514690733654695, + "learning_rate": 5.0511370384886835e-06, + "loss": 0.5209, "step": 7213 }, { - "epoch": 0.76, - "grad_norm": 2.4017310344462994, - "learning_rate": 1.446136493861039e-06, - "loss": 0.6659, + "epoch": 0.51, + "grad_norm": 1.6473977802266926, + "learning_rate": 5.049987930284403e-06, + "loss": 0.5153, "step": 7214 }, { - "epoch": 0.76, - "grad_norm": 2.1199053957284346, - "learning_rate": 1.4449379021122045e-06, - "loss": 0.6202, + "epoch": 0.51, + "grad_norm": 1.7444379297982122, + "learning_rate": 5.0488388194395925e-06, + "loss": 0.5596, "step": 7215 }, { - "epoch": 0.76, - "grad_norm": 2.3367095552631816, - "learning_rate": 1.4437397233802098e-06, - "loss": 0.629, + "epoch": 0.51, + "grad_norm": 1.587468208291168, + "learning_rate": 5.047689706014947e-06, + "loss": 0.5033, "step": 7216 }, { - "epoch": 0.76, - "grad_norm": 3.0011471861240357, - "learning_rate": 1.4425419578042538e-06, - "loss": 0.6302, + "epoch": 0.51, + "grad_norm": 1.7695429034238872, + "learning_rate": 5.046540590071173e-06, + "loss": 0.6132, "step": 7217 }, { - "epoch": 0.76, - "grad_norm": 2.595981973547213, - "learning_rate": 1.4413446055234882e-06, - "loss": 0.6064, + "epoch": 0.51, + "grad_norm": 1.843576083449908, + "learning_rate": 5.045391471668965e-06, + "loss": 0.5495, "step": 7218 }, { - "epoch": 0.76, - "grad_norm": 3.0100131890890243, - "learning_rate": 1.4401476666770191e-06, - "loss": 0.6153, + "epoch": 0.51, + "grad_norm": 0.705654737869598, + "learning_rate": 5.044242350869025e-06, + "loss": 0.4269, "step": 7219 }, { - "epoch": 0.76, - "grad_norm": 2.068773777984486, - "learning_rate": 1.4389511414039053e-06, - "loss": 0.5228, + "epoch": 0.51, + "grad_norm": 1.7192155706036898, + "learning_rate": 5.043093227732057e-06, + "loss": 0.5318, "step": 7220 }, { - "epoch": 0.76, - "grad_norm": 2.183378387130152, - "learning_rate": 1.4377550298431536e-06, - "loss": 0.6687, + "epoch": 0.51, + "grad_norm": 3.930354376564211, + "learning_rate": 5.041944102318756e-06, + "loss": 0.593, "step": 7221 }, { - "epoch": 0.76, - "grad_norm": 2.8674171438993237, - "learning_rate": 1.436559332133724e-06, - "loss": 0.6141, + "epoch": 0.51, + "grad_norm": 1.7292720597457205, + "learning_rate": 5.040794974689827e-06, + "loss": 0.548, "step": 7222 }, { - "epoch": 0.76, - "grad_norm": 2.8843731419145637, - "learning_rate": 1.4353640484145304e-06, - "loss": 0.5749, + "epoch": 0.51, + "grad_norm": 1.5109477056916976, + "learning_rate": 5.039645844905967e-06, + "loss": 0.5379, "step": 7223 }, { - "epoch": 0.76, - "grad_norm": 2.27701809442643, - "learning_rate": 1.4341691788244383e-06, - "loss": 0.6422, + "epoch": 0.51, + "grad_norm": 0.8638070213950221, + "learning_rate": 5.038496713027881e-06, + "loss": 0.4608, "step": 7224 }, { - "epoch": 0.76, - "grad_norm": 2.5322604425389112, - "learning_rate": 1.4329747235022624e-06, - "loss": 0.5397, + "epoch": 0.51, + "grad_norm": 1.749235122769125, + "learning_rate": 5.037347579116269e-06, + "loss": 0.5315, "step": 7225 }, { - "epoch": 0.76, - "grad_norm": 2.431501916595479, - "learning_rate": 1.431780682586773e-06, - "loss": 0.6515, + "epoch": 0.51, + "grad_norm": 0.7663640692608794, + "learning_rate": 5.036198443231831e-06, + "loss": 0.4755, "step": 7226 }, { - "epoch": 0.76, - "grad_norm": 2.3394101864352983, - "learning_rate": 1.4305870562166873e-06, - "loss": 0.6069, + "epoch": 0.51, + "grad_norm": 1.806818927661662, + "learning_rate": 5.035049305435267e-06, + "loss": 0.6063, "step": 7227 }, { - "epoch": 0.76, - "grad_norm": 2.2242293964368156, - "learning_rate": 1.4293938445306798e-06, - "loss": 0.5442, + "epoch": 0.51, + "grad_norm": 0.7573698531441196, + "learning_rate": 5.033900165787281e-06, + "loss": 0.4373, "step": 7228 }, { - "epoch": 0.76, - "grad_norm": 2.5833298123991604, - "learning_rate": 1.4282010476673758e-06, - "loss": 0.6004, + "epoch": 0.51, + "grad_norm": 1.8455329651365062, + "learning_rate": 5.032751024348572e-06, + "loss": 0.5774, "step": 7229 }, { - "epoch": 0.76, - "grad_norm": 2.6359655886818913, - "learning_rate": 1.427008665765348e-06, - "loss": 0.6834, + "epoch": 0.51, + "grad_norm": 1.7822989499076083, + "learning_rate": 5.031601881179845e-06, + "loss": 0.5019, "step": 7230 }, { - "epoch": 0.76, - "grad_norm": 2.585971432337667, - "learning_rate": 1.4258166989631245e-06, - "loss": 0.6375, + "epoch": 0.51, + "grad_norm": 1.5448972505936522, + "learning_rate": 5.030452736341799e-06, + "loss": 0.5072, "step": 7231 }, { - "epoch": 0.76, - "grad_norm": 3.257480641444835, - "learning_rate": 1.4246251473991845e-06, - "loss": 0.6148, + "epoch": 0.51, + "grad_norm": 1.6595951684377732, + "learning_rate": 5.029303589895136e-06, + "loss": 0.5372, "step": 7232 }, { - "epoch": 0.76, - "grad_norm": 2.6802149842143397, - "learning_rate": 1.423434011211961e-06, - "loss": 0.6633, + "epoch": 0.51, + "grad_norm": 2.152031855143544, + "learning_rate": 5.028154441900558e-06, + "loss": 0.5201, "step": 7233 }, { - "epoch": 0.76, - "grad_norm": 2.460163635030121, - "learning_rate": 1.4222432905398353e-06, - "loss": 0.5807, + "epoch": 0.51, + "grad_norm": 3.1936899109439905, + "learning_rate": 5.027005292418766e-06, + "loss": 0.5086, "step": 7234 }, { - "epoch": 0.76, - "grad_norm": 2.22647375488727, - "learning_rate": 1.4210529855211403e-06, - "loss": 0.4943, + "epoch": 0.51, + "grad_norm": 1.563384468087144, + "learning_rate": 5.025856141510465e-06, + "loss": 0.5033, "step": 7235 }, { - "epoch": 0.76, - "grad_norm": 4.121150139420541, - "learning_rate": 1.4198630962941639e-06, - "loss": 0.6249, + "epoch": 0.51, + "grad_norm": 2.04675847653485, + "learning_rate": 5.024706989236353e-06, + "loss": 0.5715, "step": 7236 }, { - "epoch": 0.76, - "grad_norm": 2.946918461716052, - "learning_rate": 1.4186736229971455e-06, - "loss": 0.6211, + "epoch": 0.51, + "grad_norm": 1.8693857345891904, + "learning_rate": 5.023557835657134e-06, + "loss": 0.5849, "step": 7237 }, { - "epoch": 0.76, - "grad_norm": 4.528975031658922, - "learning_rate": 1.4174845657682712e-06, - "loss": 0.6419, + "epoch": 0.51, + "grad_norm": 1.7593542749232354, + "learning_rate": 5.0224086808335115e-06, + "loss": 0.5149, "step": 7238 }, { - "epoch": 0.76, - "grad_norm": 2.5787193861812496, - "learning_rate": 1.4162959247456854e-06, - "loss": 0.6206, + "epoch": 0.51, + "grad_norm": 0.75551147779967, + "learning_rate": 5.021259524826187e-06, + "loss": 0.4101, "step": 7239 }, { - "epoch": 0.76, - "grad_norm": 3.406544835370247, - "learning_rate": 1.4151077000674784e-06, - "loss": 0.624, + "epoch": 0.51, + "grad_norm": 1.7572922825046093, + "learning_rate": 5.020110367695862e-06, + "loss": 0.5475, "step": 7240 }, { - "epoch": 0.76, - "grad_norm": 4.207698162326682, - "learning_rate": 1.4139198918716956e-06, - "loss": 0.5709, + "epoch": 0.51, + "grad_norm": 1.6726415173378884, + "learning_rate": 5.018961209503239e-06, + "loss": 0.4617, "step": 7241 }, { - "epoch": 0.76, - "grad_norm": 2.2463252191495924, - "learning_rate": 1.4127325002963355e-06, - "loss": 0.5574, + "epoch": 0.51, + "grad_norm": 1.603224859491169, + "learning_rate": 5.017812050309021e-06, + "loss": 0.5432, "step": 7242 }, { - "epoch": 0.76, - "grad_norm": 2.3509763553384597, - "learning_rate": 1.4115455254793436e-06, - "loss": 0.5806, + "epoch": 0.51, + "grad_norm": 1.5848769110086018, + "learning_rate": 5.016662890173909e-06, + "loss": 0.5071, "step": 7243 }, { - "epoch": 0.76, - "grad_norm": 2.52195463891043, - "learning_rate": 1.4103589675586176e-06, - "loss": 0.6281, + "epoch": 0.51, + "grad_norm": 1.7082490889526456, + "learning_rate": 5.015513729158608e-06, + "loss": 0.5359, "step": 7244 }, { - "epoch": 0.76, - "grad_norm": 2.6057177351757734, - "learning_rate": 1.4091728266720106e-06, - "loss": 0.5374, + "epoch": 0.51, + "grad_norm": 2.483721540952498, + "learning_rate": 5.014364567323819e-06, + "loss": 0.5698, "step": 7245 }, { - "epoch": 0.76, - "grad_norm": 2.378426724174707, - "learning_rate": 1.4079871029573254e-06, - "loss": 0.5312, + "epoch": 0.51, + "grad_norm": 2.22394592421172, + "learning_rate": 5.013215404730244e-06, + "loss": 0.5088, "step": 7246 }, { - "epoch": 0.76, - "grad_norm": 2.4658224720117308, - "learning_rate": 1.4068017965523146e-06, - "loss": 0.6043, + "epoch": 0.51, + "grad_norm": 2.159977920143412, + "learning_rate": 5.012066241438588e-06, + "loss": 0.4958, "step": 7247 }, { - "epoch": 0.76, - "grad_norm": 2.367796137825628, - "learning_rate": 1.4056169075946846e-06, - "loss": 0.5532, + "epoch": 0.51, + "grad_norm": 3.2717837002259853, + "learning_rate": 5.01091707750955e-06, + "loss": 0.4975, "step": 7248 }, { - "epoch": 0.76, - "grad_norm": 2.4057618387618436, - "learning_rate": 1.4044324362220912e-06, - "loss": 0.608, + "epoch": 0.51, + "grad_norm": 0.8625731917922311, + "learning_rate": 5.009767913003838e-06, + "loss": 0.4315, "step": 7249 }, { - "epoch": 0.76, - "grad_norm": 3.7601186688014003, - "learning_rate": 1.4032483825721432e-06, - "loss": 0.6427, + "epoch": 0.51, + "grad_norm": 1.8296157089392737, + "learning_rate": 5.008618747982149e-06, + "loss": 0.5825, "step": 7250 }, { - "epoch": 0.76, - "grad_norm": 2.5498571521744045, - "learning_rate": 1.4020647467824028e-06, - "loss": 0.6887, + "epoch": 0.51, + "grad_norm": 0.6978983175685524, + "learning_rate": 5.007469582505188e-06, + "loss": 0.4323, "step": 7251 }, { - "epoch": 0.76, - "grad_norm": 0.9344224790730087, - "learning_rate": 1.4008815289903798e-06, - "loss": 0.5365, + "epoch": 0.51, + "grad_norm": 2.28314763289816, + "learning_rate": 5.006320416633661e-06, + "loss": 0.4997, "step": 7252 }, { - "epoch": 0.76, - "grad_norm": 2.219760527227203, - "learning_rate": 1.3996987293335345e-06, - "loss": 0.6433, + "epoch": 0.51, + "grad_norm": 1.9671083190596543, + "learning_rate": 5.005171250428267e-06, + "loss": 0.5827, "step": 7253 }, { - "epoch": 0.76, - "grad_norm": 8.310538446761164, - "learning_rate": 1.3985163479492842e-06, - "loss": 0.6225, + "epoch": 0.51, + "grad_norm": 0.7045128805807999, + "learning_rate": 5.004022083949711e-06, + "loss": 0.4379, "step": 7254 }, { - "epoch": 0.76, - "grad_norm": 2.8517117856482517, - "learning_rate": 1.3973343849749948e-06, - "loss": 0.6371, + "epoch": 0.51, + "grad_norm": 1.7271037491130197, + "learning_rate": 5.002872917258693e-06, + "loss": 0.5398, "step": 7255 }, { - "epoch": 0.76, - "grad_norm": 2.661140852133978, - "learning_rate": 1.3961528405479824e-06, - "loss": 0.6281, + "epoch": 0.51, + "grad_norm": 1.641112828116606, + "learning_rate": 5.00172375041592e-06, + "loss": 0.5336, "step": 7256 }, { - "epoch": 0.76, - "grad_norm": 2.2805811042491286, - "learning_rate": 1.3949717148055136e-06, - "loss": 0.5957, + "epoch": 0.51, + "grad_norm": 2.784381814528962, + "learning_rate": 5.00057458348209e-06, + "loss": 0.5788, "step": 7257 }, { - "epoch": 0.76, - "grad_norm": 3.300483005629338, - "learning_rate": 1.39379100788481e-06, - "loss": 0.6551, + "epoch": 0.52, + "grad_norm": 1.5072402623407863, + "learning_rate": 4.9994254165179105e-06, + "loss": 0.56, "step": 7258 }, { - "epoch": 0.76, - "grad_norm": 2.485309336234544, - "learning_rate": 1.3926107199230442e-06, - "loss": 0.6081, + "epoch": 0.52, + "grad_norm": 1.5348676725488957, + "learning_rate": 4.998276249584082e-06, + "loss": 0.6639, "step": 7259 }, { - "epoch": 0.76, - "grad_norm": 8.341174494875231, - "learning_rate": 1.3914308510573354e-06, - "loss": 0.6515, + "epoch": 0.52, + "grad_norm": 1.7916397577932328, + "learning_rate": 4.997127082741307e-06, + "loss": 0.4294, "step": 7260 }, { - "epoch": 0.76, - "grad_norm": 2.8732963511207283, - "learning_rate": 1.3902514014247608e-06, - "loss": 0.5541, + "epoch": 0.52, + "grad_norm": 1.5645671543729323, + "learning_rate": 4.995977916050291e-06, + "loss": 0.484, "step": 7261 }, { - "epoch": 0.76, - "grad_norm": 2.2043168478650896, - "learning_rate": 1.3890723711623421e-06, - "loss": 0.588, + "epoch": 0.52, + "grad_norm": 1.7220891046244426, + "learning_rate": 4.9948287495717345e-06, + "loss": 0.4791, "step": 7262 }, { - "epoch": 0.76, - "grad_norm": 2.853557533816863, - "learning_rate": 1.3878937604070568e-06, - "loss": 0.5689, + "epoch": 0.52, + "grad_norm": 1.499614914639197, + "learning_rate": 4.993679583366341e-06, + "loss": 0.5255, "step": 7263 }, { - "epoch": 0.76, - "grad_norm": 2.8253444135108845, - "learning_rate": 1.386715569295835e-06, - "loss": 0.5745, + "epoch": 0.52, + "grad_norm": 1.7632920332517494, + "learning_rate": 4.992530417494812e-06, + "loss": 0.4827, "step": 7264 }, { - "epoch": 0.76, - "grad_norm": 0.9796152518485929, - "learning_rate": 1.3855377979655533e-06, - "loss": 0.5749, + "epoch": 0.52, + "grad_norm": 2.055270365578761, + "learning_rate": 4.9913812520178524e-06, + "loss": 0.5338, "step": 7265 }, { - "epoch": 0.76, - "grad_norm": 3.6160882171024125, - "learning_rate": 1.3843604465530398e-06, - "loss": 0.5622, + "epoch": 0.52, + "grad_norm": 1.9181685268858486, + "learning_rate": 4.990232086996165e-06, + "loss": 0.5925, "step": 7266 }, { - "epoch": 0.76, - "grad_norm": 2.0041115107011476, - "learning_rate": 1.3831835151950784e-06, - "loss": 0.5528, + "epoch": 0.52, + "grad_norm": 2.052652317873827, + "learning_rate": 4.98908292249045e-06, + "loss": 0.5841, "step": 7267 }, { - "epoch": 0.76, - "grad_norm": 2.16044720788834, - "learning_rate": 1.3820070040284023e-06, - "loss": 0.6886, + "epoch": 0.52, + "grad_norm": 1.5402671363346916, + "learning_rate": 4.987933758561414e-06, + "loss": 0.5503, "step": 7268 }, { - "epoch": 0.76, - "grad_norm": 2.8645110276055927, - "learning_rate": 1.380830913189694e-06, - "loss": 0.6323, + "epoch": 0.52, + "grad_norm": 1.959775331612989, + "learning_rate": 4.986784595269758e-06, + "loss": 0.5187, "step": 7269 }, { - "epoch": 0.77, - "grad_norm": 2.0345813564664548, - "learning_rate": 1.3796552428155868e-06, - "loss": 0.5463, + "epoch": 0.52, + "grad_norm": 1.735648065561153, + "learning_rate": 4.985635432676184e-06, + "loss": 0.5624, "step": 7270 }, { - "epoch": 0.77, - "grad_norm": 2.4861411344231317, - "learning_rate": 1.378479993042668e-06, - "loss": 0.6813, + "epoch": 0.52, + "grad_norm": 1.87712740782785, + "learning_rate": 4.984486270841393e-06, + "loss": 0.4823, "step": 7271 }, { - "epoch": 0.77, - "grad_norm": 2.5548781593021594, - "learning_rate": 1.3773051640074764e-06, - "loss": 0.6686, + "epoch": 0.52, + "grad_norm": 0.7293366666424574, + "learning_rate": 4.983337109826093e-06, + "loss": 0.4563, "step": 7272 }, { - "epoch": 0.77, - "grad_norm": 2.7242847584411165, - "learning_rate": 1.3761307558464975e-06, - "loss": 0.635, + "epoch": 0.52, + "grad_norm": 0.7133623007553, + "learning_rate": 4.982187949690981e-06, + "loss": 0.4277, "step": 7273 }, { - "epoch": 0.77, - "grad_norm": 4.079673277666802, - "learning_rate": 1.3749567686961728e-06, - "loss": 0.6109, + "epoch": 0.52, + "grad_norm": 1.7270411304845135, + "learning_rate": 4.981038790496763e-06, + "loss": 0.5562, "step": 7274 }, { - "epoch": 0.77, - "grad_norm": 0.9119319275996398, - "learning_rate": 1.3737832026928905e-06, - "loss": 0.5083, + "epoch": 0.52, + "grad_norm": 1.764351877262511, + "learning_rate": 4.979889632304138e-06, + "loss": 0.5697, "step": 7275 }, { - "epoch": 0.77, - "grad_norm": 2.645571565188374, - "learning_rate": 1.3726100579729935e-06, - "loss": 0.6791, + "epoch": 0.52, + "grad_norm": 1.4504232648468505, + "learning_rate": 4.9787404751738135e-06, + "loss": 0.4977, "step": 7276 }, { - "epoch": 0.77, - "grad_norm": 2.280058640830768, - "learning_rate": 1.3714373346727754e-06, - "loss": 0.6213, + "epoch": 0.52, + "grad_norm": 0.8048429391730955, + "learning_rate": 4.97759131916649e-06, + "loss": 0.4318, "step": 7277 }, { - "epoch": 0.77, - "grad_norm": 2.3684177527062755, - "learning_rate": 1.3702650329284794e-06, - "loss": 0.6239, + "epoch": 0.52, + "grad_norm": 1.842523802109378, + "learning_rate": 4.9764421643428675e-06, + "loss": 0.5527, "step": 7278 }, { - "epoch": 0.77, - "grad_norm": 3.5713000588210955, - "learning_rate": 1.3690931528762974e-06, - "loss": 0.6513, + "epoch": 0.52, + "grad_norm": 1.5425789839625967, + "learning_rate": 4.975293010763648e-06, + "loss": 0.5084, "step": 7279 }, { - "epoch": 0.77, - "grad_norm": 2.191606619177731, - "learning_rate": 1.3679216946523771e-06, - "loss": 0.6179, + "epoch": 0.52, + "grad_norm": 1.8472122702634888, + "learning_rate": 4.974143858489537e-06, + "loss": 0.5103, "step": 7280 }, { - "epoch": 0.77, - "grad_norm": 2.520087458009411, - "learning_rate": 1.3667506583928163e-06, - "loss": 0.6273, + "epoch": 0.52, + "grad_norm": 0.7591169982563802, + "learning_rate": 4.972994707581235e-06, + "loss": 0.445, "step": 7281 }, { - "epoch": 0.77, - "grad_norm": 2.8211254894412083, - "learning_rate": 1.3655800442336597e-06, - "loss": 0.6629, + "epoch": 0.52, + "grad_norm": 1.819434752435671, + "learning_rate": 4.971845558099443e-06, + "loss": 0.6028, "step": 7282 }, { - "epoch": 0.77, - "grad_norm": 2.5218498030614986, - "learning_rate": 1.3644098523109096e-06, - "loss": 0.5631, + "epoch": 0.52, + "grad_norm": 1.6563495002598083, + "learning_rate": 4.970696410104865e-06, + "loss": 0.569, "step": 7283 }, { - "epoch": 0.77, - "grad_norm": 2.187549795477403, - "learning_rate": 1.3632400827605113e-06, - "loss": 0.552, + "epoch": 0.52, + "grad_norm": 1.9422865757037184, + "learning_rate": 4.9695472636582035e-06, + "loss": 0.5756, "step": 7284 }, { - "epoch": 0.77, - "grad_norm": 4.067246657340486, - "learning_rate": 1.3620707357183694e-06, - "loss": 0.6166, + "epoch": 0.52, + "grad_norm": 1.7093473061284465, + "learning_rate": 4.968398118820157e-06, + "loss": 0.5193, "step": 7285 }, { - "epoch": 0.77, - "grad_norm": 15.55712958329288, - "learning_rate": 1.3609018113203314e-06, - "loss": 0.6316, + "epoch": 0.52, + "grad_norm": 1.7519770391294718, + "learning_rate": 4.9672489756514284e-06, + "loss": 0.6103, "step": 7286 }, { - "epoch": 0.77, - "grad_norm": 2.5699486322531566, - "learning_rate": 1.3597333097022031e-06, - "loss": 0.6579, + "epoch": 0.52, + "grad_norm": 1.5457193395840516, + "learning_rate": 4.966099834212721e-06, + "loss": 0.5481, "step": 7287 }, { - "epoch": 0.77, - "grad_norm": 2.606631331064434, - "learning_rate": 1.3585652309997344e-06, - "loss": 0.6924, + "epoch": 0.52, + "grad_norm": 1.8762245569098595, + "learning_rate": 4.964950694564734e-06, + "loss": 0.555, "step": 7288 }, { - "epoch": 0.77, - "grad_norm": 2.4639655618590646, - "learning_rate": 1.3573975753486313e-06, - "loss": 0.6486, + "epoch": 0.52, + "grad_norm": 1.4737133075298772, + "learning_rate": 4.963801556768172e-06, + "loss": 0.5457, "step": 7289 }, { - "epoch": 0.77, - "grad_norm": 3.494505254711063, - "learning_rate": 1.356230342884549e-06, - "loss": 0.6138, + "epoch": 0.52, + "grad_norm": 1.6450680366116832, + "learning_rate": 4.962652420883732e-06, + "loss": 0.5311, "step": 7290 }, { - "epoch": 0.77, - "grad_norm": 2.6073670766020154, - "learning_rate": 1.3550635337430928e-06, - "loss": 0.5951, + "epoch": 0.52, + "grad_norm": 1.5037485486985844, + "learning_rate": 4.9615032869721195e-06, + "loss": 0.5117, "step": 7291 }, { - "epoch": 0.77, - "grad_norm": 3.2580430689462734, - "learning_rate": 1.3538971480598167e-06, - "loss": 0.6027, + "epoch": 0.52, + "grad_norm": 1.903523040455951, + "learning_rate": 4.960354155094034e-06, + "loss": 0.5751, "step": 7292 }, { - "epoch": 0.77, - "grad_norm": 0.9517406444625436, - "learning_rate": 1.3527311859702308e-06, - "loss": 0.5322, + "epoch": 0.52, + "grad_norm": 1.8904026770030742, + "learning_rate": 4.959205025310175e-06, + "loss": 0.4847, "step": 7293 }, { - "epoch": 0.77, - "grad_norm": 2.860206476602887, - "learning_rate": 1.3515656476097937e-06, - "loss": 0.5832, + "epoch": 0.52, + "grad_norm": 2.80369450954564, + "learning_rate": 4.9580558976812445e-06, + "loss": 0.5236, "step": 7294 }, { - "epoch": 0.77, - "grad_norm": 4.9771138110934405, - "learning_rate": 1.350400533113912e-06, - "loss": 0.543, + "epoch": 0.52, + "grad_norm": 1.5086448198287172, + "learning_rate": 4.956906772267945e-06, + "loss": 0.496, "step": 7295 }, { - "epoch": 0.77, - "grad_norm": 2.582876236962234, - "learning_rate": 1.3492358426179475e-06, - "loss": 0.6746, + "epoch": 0.52, + "grad_norm": 0.7517681557055905, + "learning_rate": 4.955757649130976e-06, + "loss": 0.4372, "step": 7296 }, { - "epoch": 0.77, - "grad_norm": 2.0636423377041937, - "learning_rate": 1.3480715762572078e-06, - "loss": 0.657, + "epoch": 0.52, + "grad_norm": 0.6965478616827675, + "learning_rate": 4.954608528331038e-06, + "loss": 0.4687, "step": 7297 }, { - "epoch": 0.77, - "grad_norm": 2.220396192129797, - "learning_rate": 1.3469077341669579e-06, - "loss": 0.6404, + "epoch": 0.52, + "grad_norm": 1.486501053378748, + "learning_rate": 4.953459409928829e-06, + "loss": 0.5254, "step": 7298 }, { - "epoch": 0.77, - "grad_norm": 2.8263887939999264, - "learning_rate": 1.3457443164824053e-06, - "loss": 0.6004, + "epoch": 0.52, + "grad_norm": 0.7619300355084733, + "learning_rate": 4.9523102939850535e-06, + "loss": 0.4197, "step": 7299 }, { - "epoch": 0.77, - "grad_norm": 2.70882407444034, - "learning_rate": 1.3445813233387167e-06, - "loss": 0.7198, + "epoch": 0.52, + "grad_norm": 2.4703459123713984, + "learning_rate": 4.951161180560411e-06, + "loss": 0.539, "step": 7300 }, { - "epoch": 0.77, - "grad_norm": 3.3517602576307355, - "learning_rate": 1.3434187548710014e-06, - "loss": 0.6133, + "epoch": 0.52, + "grad_norm": 1.5990817683088132, + "learning_rate": 4.950012069715598e-06, + "loss": 0.5942, "step": 7301 }, { - "epoch": 0.77, - "grad_norm": 2.8385464767154356, - "learning_rate": 1.3422566112143248e-06, - "loss": 0.527, + "epoch": 0.52, + "grad_norm": 1.4823258718291015, + "learning_rate": 4.948862961511318e-06, + "loss": 0.586, "step": 7302 }, { - "epoch": 0.77, - "grad_norm": 2.7137520222257563, - "learning_rate": 1.3410948925037037e-06, - "loss": 0.7291, + "epoch": 0.52, + "grad_norm": 1.6477803011147398, + "learning_rate": 4.947713856008269e-06, + "loss": 0.5404, "step": 7303 }, { - "epoch": 0.77, - "grad_norm": 2.238965834669248, - "learning_rate": 1.3399335988741007e-06, - "loss": 0.6578, + "epoch": 0.52, + "grad_norm": 0.7582317307299441, + "learning_rate": 4.946564753267153e-06, + "loss": 0.4442, "step": 7304 }, { - "epoch": 0.77, - "grad_norm": 0.9827149232414178, - "learning_rate": 1.338772730460431e-06, - "loss": 0.5438, + "epoch": 0.52, + "grad_norm": 2.056629385867051, + "learning_rate": 4.945415653348665e-06, + "loss": 0.6541, "step": 7305 }, { - "epoch": 0.77, - "grad_norm": 4.982687189717289, - "learning_rate": 1.3376122873975616e-06, - "loss": 0.5661, + "epoch": 0.52, + "grad_norm": 1.4548594594819615, + "learning_rate": 4.944266556313509e-06, + "loss": 0.4713, "step": 7306 }, { - "epoch": 0.77, - "grad_norm": 2.394229778749516, - "learning_rate": 1.3364522698203114e-06, - "loss": 0.5627, + "epoch": 0.52, + "grad_norm": 3.1494274175085297, + "learning_rate": 4.943117462222384e-06, + "loss": 0.5059, "step": 7307 }, { - "epoch": 0.77, - "grad_norm": 2.5676561438034358, - "learning_rate": 1.3352926778634446e-06, - "loss": 0.5664, + "epoch": 0.52, + "grad_norm": 1.796287612559202, + "learning_rate": 4.941968371135984e-06, + "loss": 0.4994, "step": 7308 }, { - "epoch": 0.77, - "grad_norm": 4.69224823273842, - "learning_rate": 1.3341335116616822e-06, - "loss": 0.5727, + "epoch": 0.52, + "grad_norm": 1.861721318817861, + "learning_rate": 4.940819283115013e-06, + "loss": 0.5518, "step": 7309 }, { - "epoch": 0.77, - "grad_norm": 2.8984898812420727, - "learning_rate": 1.3329747713496904e-06, - "loss": 0.6014, + "epoch": 0.52, + "grad_norm": 1.6850851880783693, + "learning_rate": 4.939670198220168e-06, + "loss": 0.5342, "step": 7310 }, { - "epoch": 0.77, - "grad_norm": 2.5133785385178657, - "learning_rate": 1.331816457062089e-06, - "loss": 0.6487, + "epoch": 0.52, + "grad_norm": 1.5351681440168856, + "learning_rate": 4.938521116512147e-06, + "loss": 0.5481, "step": 7311 }, { - "epoch": 0.77, - "grad_norm": 2.550922385813062, - "learning_rate": 1.3306585689334494e-06, - "loss": 0.605, + "epoch": 0.52, + "grad_norm": 0.7683130362519132, + "learning_rate": 4.93737203805165e-06, + "loss": 0.4502, "step": 7312 }, { - "epoch": 0.77, - "grad_norm": 2.6330342158074487, - "learning_rate": 1.3295011070982906e-06, - "loss": 0.5916, + "epoch": 0.52, + "grad_norm": 1.7135111792916076, + "learning_rate": 4.936222962899372e-06, + "loss": 0.4866, "step": 7313 }, { - "epoch": 0.77, - "grad_norm": 2.906335153096584, - "learning_rate": 1.3283440716910812e-06, - "loss": 0.6149, + "epoch": 0.52, + "grad_norm": 1.5765238638206085, + "learning_rate": 4.935073891116015e-06, + "loss": 0.5528, "step": 7314 }, { - "epoch": 0.77, - "grad_norm": 4.095159349488047, - "learning_rate": 1.327187462846244e-06, - "loss": 0.5979, + "epoch": 0.52, + "grad_norm": 1.3960977494701152, + "learning_rate": 4.933924822762276e-06, + "loss": 0.5427, "step": 7315 }, { - "epoch": 0.77, - "grad_norm": 2.8365869518598976, - "learning_rate": 1.3260312806981517e-06, - "loss": 0.4815, + "epoch": 0.52, + "grad_norm": 2.0418287895522167, + "learning_rate": 4.93277575789885e-06, + "loss": 0.5587, "step": 7316 }, { - "epoch": 0.77, - "grad_norm": 2.072771877951047, - "learning_rate": 1.3248755253811236e-06, - "loss": 0.5718, + "epoch": 0.52, + "grad_norm": 1.5025091497905536, + "learning_rate": 4.931626696586435e-06, + "loss": 0.492, "step": 7317 }, { - "epoch": 0.77, - "grad_norm": 3.5814898012658896, - "learning_rate": 1.3237201970294344e-06, - "loss": 0.5606, + "epoch": 0.52, + "grad_norm": 2.0128210261794472, + "learning_rate": 4.930477638885733e-06, + "loss": 0.5476, "step": 7318 }, { - "epoch": 0.77, - "grad_norm": 2.290088791951938, - "learning_rate": 1.3225652957773044e-06, - "loss": 0.6147, + "epoch": 0.52, + "grad_norm": 0.7240440208197867, + "learning_rate": 4.929328584857436e-06, + "loss": 0.4442, "step": 7319 }, { - "epoch": 0.77, - "grad_norm": 2.485404940783879, - "learning_rate": 1.3214108217589095e-06, - "loss": 0.5559, + "epoch": 0.52, + "grad_norm": 0.7699847052167357, + "learning_rate": 4.928179534562241e-06, + "loss": 0.4316, "step": 7320 }, { - "epoch": 0.77, - "grad_norm": 2.890814837354043, - "learning_rate": 1.3202567751083701e-06, - "loss": 0.5864, + "epoch": 0.52, + "grad_norm": 1.6586849891371507, + "learning_rate": 4.927030488060849e-06, + "loss": 0.5568, "step": 7321 }, { - "epoch": 0.77, - "grad_norm": 2.3304011547001933, - "learning_rate": 1.3191031559597628e-06, - "loss": 0.686, + "epoch": 0.52, + "grad_norm": 2.170569533833282, + "learning_rate": 4.9258814454139535e-06, + "loss": 0.5194, "step": 7322 }, { - "epoch": 0.77, - "grad_norm": 2.939817245282438, - "learning_rate": 1.3179499644471088e-06, - "loss": 0.7083, + "epoch": 0.52, + "grad_norm": 1.8493214802532791, + "learning_rate": 4.924732406682251e-06, + "loss": 0.468, "step": 7323 }, { - "epoch": 0.77, - "grad_norm": 1.9322448250042434, - "learning_rate": 1.3167972007043844e-06, - "loss": 0.617, + "epoch": 0.52, + "grad_norm": 0.7951036683305411, + "learning_rate": 4.923583371926436e-06, + "loss": 0.4565, "step": 7324 }, { - "epoch": 0.77, - "grad_norm": 2.2730836852993885, - "learning_rate": 1.3156448648655163e-06, - "loss": 0.6175, + "epoch": 0.52, + "grad_norm": 1.6236967319641786, + "learning_rate": 4.922434341207208e-06, + "loss": 0.5372, "step": 7325 }, { - "epoch": 0.77, - "grad_norm": 2.216691599440806, - "learning_rate": 1.3144929570643767e-06, - "loss": 0.5699, + "epoch": 0.52, + "grad_norm": 1.5171929123922037, + "learning_rate": 4.9212853145852605e-06, + "loss": 0.4953, "step": 7326 }, { - "epoch": 0.77, - "grad_norm": 2.2709102711271103, - "learning_rate": 1.3133414774347903e-06, - "loss": 0.6783, + "epoch": 0.52, + "grad_norm": 2.430018558229765, + "learning_rate": 4.9201362921212905e-06, + "loss": 0.5332, "step": 7327 }, { - "epoch": 0.77, - "grad_norm": 3.7008961073983926, - "learning_rate": 1.3121904261105339e-06, - "loss": 0.5866, + "epoch": 0.52, + "grad_norm": 1.7219510683218358, + "learning_rate": 4.918987273875989e-06, + "loss": 0.5116, "step": 7328 }, { - "epoch": 0.77, - "grad_norm": 1.0182432463584763, - "learning_rate": 1.3110398032253346e-06, - "loss": 0.5087, + "epoch": 0.52, + "grad_norm": 1.673881951175707, + "learning_rate": 4.917838259910058e-06, + "loss": 0.5809, "step": 7329 }, { - "epoch": 0.77, - "grad_norm": 1.0497274511645123, - "learning_rate": 1.3098896089128666e-06, - "loss": 0.5744, + "epoch": 0.52, + "grad_norm": 1.8155002702907637, + "learning_rate": 4.916689250284188e-06, + "loss": 0.5358, "step": 7330 }, { - "epoch": 0.77, - "grad_norm": 2.2533986618656225, - "learning_rate": 1.3087398433067577e-06, - "loss": 0.5694, + "epoch": 0.52, + "grad_norm": 1.932130852182361, + "learning_rate": 4.91554024505907e-06, + "loss": 0.5895, "step": 7331 }, { - "epoch": 0.77, - "grad_norm": 2.4413558422766193, - "learning_rate": 1.307590506540582e-06, - "loss": 0.616, + "epoch": 0.52, + "grad_norm": 1.54986950392895, + "learning_rate": 4.914391244295405e-06, + "loss": 0.5334, "step": 7332 }, { - "epoch": 0.77, - "grad_norm": 2.3644581867426897, - "learning_rate": 1.3064415987478691e-06, - "loss": 0.6208, + "epoch": 0.52, + "grad_norm": 1.475992079844505, + "learning_rate": 4.913242248053886e-06, + "loss": 0.5487, "step": 7333 }, { - "epoch": 0.77, - "grad_norm": 0.9694715957645057, - "learning_rate": 1.3052931200620926e-06, - "loss": 0.5448, + "epoch": 0.52, + "grad_norm": 1.8019433301398566, + "learning_rate": 4.912093256395205e-06, + "loss": 0.5334, "step": 7334 }, { - "epoch": 0.77, - "grad_norm": 2.989251780651652, - "learning_rate": 1.3041450706166831e-06, - "loss": 0.5686, + "epoch": 0.52, + "grad_norm": 0.757988423785613, + "learning_rate": 4.910944269380054e-06, + "loss": 0.4296, "step": 7335 }, { - "epoch": 0.77, - "grad_norm": 2.568697403594937, - "learning_rate": 1.3029974505450137e-06, - "loss": 0.6258, + "epoch": 0.52, + "grad_norm": 1.5404619600443263, + "learning_rate": 4.909795287069131e-06, + "loss": 0.5086, "step": 7336 }, { - "epoch": 0.77, - "grad_norm": 2.4006514658503697, - "learning_rate": 1.301850259980414e-06, - "loss": 0.587, + "epoch": 0.52, + "grad_norm": 1.7294740967188893, + "learning_rate": 4.908646309523126e-06, + "loss": 0.5123, "step": 7337 }, { - "epoch": 0.77, - "grad_norm": 2.4391008775614704, - "learning_rate": 1.3007034990561619e-06, - "loss": 0.6487, + "epoch": 0.52, + "grad_norm": 1.6191698637764513, + "learning_rate": 4.907497336802732e-06, + "loss": 0.5597, "step": 7338 }, { - "epoch": 0.77, - "grad_norm": 2.27176792056776, - "learning_rate": 1.2995571679054835e-06, - "loss": 0.6879, + "epoch": 0.52, + "grad_norm": 1.5788484188302085, + "learning_rate": 4.9063483689686415e-06, + "loss": 0.5193, "step": 7339 }, { - "epoch": 0.77, - "grad_norm": 2.332459416723728, - "learning_rate": 1.2984112666615555e-06, - "loss": 0.5647, + "epoch": 0.52, + "grad_norm": 2.5271686612925537, + "learning_rate": 4.905199406081547e-06, + "loss": 0.533, "step": 7340 }, { - "epoch": 0.77, - "grad_norm": 1.0350097122071702, - "learning_rate": 1.2972657954575064e-06, - "loss": 0.541, + "epoch": 0.52, + "grad_norm": 2.073671004993185, + "learning_rate": 4.904050448202144e-06, + "loss": 0.5145, "step": 7341 }, { - "epoch": 0.77, - "grad_norm": 2.3128014660034273, - "learning_rate": 1.2961207544264149e-06, - "loss": 0.6494, + "epoch": 0.52, + "grad_norm": 1.8285991045877787, + "learning_rate": 4.902901495391119e-06, + "loss": 0.5013, "step": 7342 }, { - "epoch": 0.77, - "grad_norm": 2.281808135510517, - "learning_rate": 1.2949761437013059e-06, - "loss": 0.6048, + "epoch": 0.52, + "grad_norm": 1.5901907612462758, + "learning_rate": 4.901752547709166e-06, + "loss": 0.5395, "step": 7343 }, { - "epoch": 0.77, - "grad_norm": 2.495322499540691, - "learning_rate": 1.2938319634151597e-06, - "loss": 0.5942, + "epoch": 0.52, + "grad_norm": 1.7126069626901925, + "learning_rate": 4.900603605216977e-06, + "loss": 0.4922, "step": 7344 }, { - "epoch": 0.77, - "grad_norm": 3.914464189765621, - "learning_rate": 1.2926882137009012e-06, - "loss": 0.66, + "epoch": 0.52, + "grad_norm": 1.507891969992394, + "learning_rate": 4.899454667975242e-06, + "loss": 0.5554, "step": 7345 }, { - "epoch": 0.77, - "grad_norm": 3.098291543647439, - "learning_rate": 1.2915448946914106e-06, - "loss": 0.5978, + "epoch": 0.52, + "grad_norm": 2.40863267000915, + "learning_rate": 4.898305736044652e-06, + "loss": 0.5683, "step": 7346 }, { - "epoch": 0.77, - "grad_norm": 2.435837683057105, - "learning_rate": 1.2904020065195127e-06, - "loss": 0.5946, + "epoch": 0.52, + "grad_norm": 1.9209637769275547, + "learning_rate": 4.897156809485898e-06, + "loss": 0.5584, "step": 7347 }, { - "epoch": 0.77, - "grad_norm": 2.4918968126223633, - "learning_rate": 1.2892595493179876e-06, - "loss": 0.6562, + "epoch": 0.52, + "grad_norm": 1.7279904297580817, + "learning_rate": 4.8960078883596706e-06, + "loss": 0.5495, "step": 7348 }, { - "epoch": 0.77, - "grad_norm": 2.2303147723923877, - "learning_rate": 1.2881175232195604e-06, - "loss": 0.6037, + "epoch": 0.52, + "grad_norm": 1.7577852968405348, + "learning_rate": 4.894858972726659e-06, + "loss": 0.5297, "step": 7349 }, { - "epoch": 0.77, - "grad_norm": 2.4085569825882223, - "learning_rate": 1.2869759283569088e-06, - "loss": 0.5633, + "epoch": 0.52, + "grad_norm": 0.7589329591726663, + "learning_rate": 4.8937100626475505e-06, + "loss": 0.4363, "step": 7350 }, { - "epoch": 0.77, - "grad_norm": 2.9401916376736774, - "learning_rate": 1.2858347648626623e-06, - "loss": 0.6793, + "epoch": 0.52, + "grad_norm": 2.0177208520344454, + "learning_rate": 4.892561158183038e-06, + "loss": 0.4878, "step": 7351 }, { - "epoch": 0.77, - "grad_norm": 2.620281409590404, - "learning_rate": 1.2846940328693952e-06, - "loss": 0.5221, + "epoch": 0.52, + "grad_norm": 2.591169402828059, + "learning_rate": 4.891412259393811e-06, + "loss": 0.5092, "step": 7352 }, { - "epoch": 0.77, - "grad_norm": 2.460645653228923, - "learning_rate": 1.2835537325096364e-06, - "loss": 0.6427, + "epoch": 0.52, + "grad_norm": 2.126151293028521, + "learning_rate": 4.890263366340554e-06, + "loss": 0.5393, "step": 7353 }, { - "epoch": 0.77, - "grad_norm": 2.1645773174699605, - "learning_rate": 1.2824138639158607e-06, - "loss": 0.5951, + "epoch": 0.52, + "grad_norm": 2.014463959281932, + "learning_rate": 4.889114479083958e-06, + "loss": 0.5862, "step": 7354 }, { - "epoch": 0.77, - "grad_norm": 1.940681938546237, - "learning_rate": 1.2812744272204969e-06, - "loss": 0.6173, + "epoch": 0.52, + "grad_norm": 1.6088368368993453, + "learning_rate": 4.887965597684711e-06, + "loss": 0.5609, "step": 7355 }, { - "epoch": 0.77, - "grad_norm": 1.9852641803573394, - "learning_rate": 1.2801354225559194e-06, - "loss": 0.634, + "epoch": 0.52, + "grad_norm": 1.468983047816732, + "learning_rate": 4.886816722203502e-06, + "loss": 0.5169, "step": 7356 }, { - "epoch": 0.77, - "grad_norm": 2.408422665342694, - "learning_rate": 1.2789968500544563e-06, - "loss": 0.5832, + "epoch": 0.52, + "grad_norm": 4.683650072407551, + "learning_rate": 4.885667852701017e-06, + "loss": 0.531, "step": 7357 }, { - "epoch": 0.77, - "grad_norm": 2.54563268747217, - "learning_rate": 1.277858709848382e-06, - "loss": 0.5927, + "epoch": 0.52, + "grad_norm": 1.7333239597381842, + "learning_rate": 4.8845189892379415e-06, + "loss": 0.5231, "step": 7358 }, { - "epoch": 0.77, - "grad_norm": 3.892292877622211, - "learning_rate": 1.2767210020699234e-06, - "loss": 0.6332, + "epoch": 0.52, + "grad_norm": 0.8062943636873092, + "learning_rate": 4.883370131874966e-06, + "loss": 0.4582, "step": 7359 }, { - "epoch": 0.77, - "grad_norm": 0.9690186601279351, - "learning_rate": 1.2755837268512566e-06, - "loss": 0.5541, + "epoch": 0.52, + "grad_norm": 1.7797894737046922, + "learning_rate": 4.882221280672775e-06, + "loss": 0.5604, "step": 7360 }, { - "epoch": 0.77, - "grad_norm": 2.3973108957180673, - "learning_rate": 1.2744468843245066e-06, - "loss": 0.6973, + "epoch": 0.52, + "grad_norm": 2.063033854882861, + "learning_rate": 4.881072435692055e-06, + "loss": 0.4771, "step": 7361 }, { - "epoch": 0.77, - "grad_norm": 9.445703072300839, - "learning_rate": 1.2733104746217468e-06, - "loss": 0.5102, + "epoch": 0.52, + "grad_norm": 2.2966781479327145, + "learning_rate": 4.87992359699349e-06, + "loss": 0.4907, "step": 7362 }, { - "epoch": 0.77, - "grad_norm": 2.4460880394351956, - "learning_rate": 1.2721744978750028e-06, - "loss": 0.6275, + "epoch": 0.52, + "grad_norm": 2.0132215808843585, + "learning_rate": 4.878774764637771e-06, + "loss": 0.5399, "step": 7363 }, { - "epoch": 0.77, - "grad_norm": 2.5731172106909193, - "learning_rate": 1.271038954216251e-06, - "loss": 0.567, + "epoch": 0.52, + "grad_norm": 1.703541505438867, + "learning_rate": 4.877625938685579e-06, + "loss": 0.5569, "step": 7364 }, { - "epoch": 0.78, - "grad_norm": 3.312904188551475, - "learning_rate": 1.269903843777413e-06, - "loss": 0.6004, + "epoch": 0.52, + "grad_norm": 1.6747568189926068, + "learning_rate": 4.876477119197597e-06, + "loss": 0.5393, "step": 7365 }, { - "epoch": 0.78, - "grad_norm": 2.3261426877154916, - "learning_rate": 1.2687691666903657e-06, - "loss": 0.5648, + "epoch": 0.52, + "grad_norm": 1.8621462905954436, + "learning_rate": 4.875328306234514e-06, + "loss": 0.5603, "step": 7366 }, { - "epoch": 0.78, - "grad_norm": 4.634024092091831, - "learning_rate": 1.2676349230869283e-06, - "loss": 0.6585, + "epoch": 0.52, + "grad_norm": 1.65100075481453, + "learning_rate": 4.874179499857014e-06, + "loss": 0.486, "step": 7367 }, { - "epoch": 0.78, - "grad_norm": 2.5573244615411075, - "learning_rate": 1.2665011130988786e-06, - "loss": 0.5732, + "epoch": 0.52, + "grad_norm": 1.483617828046408, + "learning_rate": 4.873030700125776e-06, + "loss": 0.5323, "step": 7368 }, { - "epoch": 0.78, - "grad_norm": 2.2354267402034234, - "learning_rate": 1.2653677368579354e-06, - "loss": 0.5843, + "epoch": 0.52, + "grad_norm": 1.5947419698944014, + "learning_rate": 4.871881907101487e-06, + "loss": 0.5383, "step": 7369 }, { - "epoch": 0.78, - "grad_norm": 2.2289656447356867, - "learning_rate": 1.2642347944957744e-06, - "loss": 0.7049, + "epoch": 0.52, + "grad_norm": 1.7882505084752232, + "learning_rate": 4.870733120844831e-06, + "loss": 0.4783, "step": 7370 }, { - "epoch": 0.78, - "grad_norm": 3.1454417786703353, - "learning_rate": 1.2631022861440145e-06, - "loss": 0.5499, + "epoch": 0.52, + "grad_norm": 2.034000441614421, + "learning_rate": 4.86958434141649e-06, + "loss": 0.5397, "step": 7371 }, { - "epoch": 0.78, - "grad_norm": 3.367273641722864, - "learning_rate": 1.2619702119342286e-06, - "loss": 0.5656, + "epoch": 0.52, + "grad_norm": 1.6923893103618055, + "learning_rate": 4.868435568877146e-06, + "loss": 0.5924, "step": 7372 }, { - "epoch": 0.78, - "grad_norm": 2.156216839264307, - "learning_rate": 1.2608385719979394e-06, - "loss": 0.6229, + "epoch": 0.52, + "grad_norm": 1.8831985494351888, + "learning_rate": 4.867286803287478e-06, + "loss": 0.5419, "step": 7373 }, { - "epoch": 0.78, - "grad_norm": 3.707241733582545, - "learning_rate": 1.2597073664666159e-06, - "loss": 0.5974, + "epoch": 0.52, + "grad_norm": 1.9059238241951892, + "learning_rate": 4.866138044708175e-06, + "loss": 0.4841, "step": 7374 }, { - "epoch": 0.78, - "grad_norm": 2.589860918286414, - "learning_rate": 1.2585765954716773e-06, - "loss": 0.6479, + "epoch": 0.52, + "grad_norm": 1.5834790823932463, + "learning_rate": 4.864989293199912e-06, + "loss": 0.4753, "step": 7375 }, { - "epoch": 0.78, - "grad_norm": 2.1972502473528763, - "learning_rate": 1.257446259144494e-06, - "loss": 0.6352, + "epoch": 0.52, + "grad_norm": 1.6879136176173732, + "learning_rate": 4.863840548823373e-06, + "loss": 0.5229, "step": 7376 }, { - "epoch": 0.78, - "grad_norm": 2.8739108933647244, - "learning_rate": 1.2563163576163879e-06, - "loss": 0.6169, + "epoch": 0.52, + "grad_norm": 1.6147500189709878, + "learning_rate": 4.862691811639235e-06, + "loss": 0.5571, "step": 7377 }, { - "epoch": 0.78, - "grad_norm": 2.599760260919812, - "learning_rate": 1.2551868910186238e-06, - "loss": 0.5686, + "epoch": 0.52, + "grad_norm": 1.8373899915317233, + "learning_rate": 4.861543081708183e-06, + "loss": 0.6408, "step": 7378 }, { - "epoch": 0.78, - "grad_norm": 1.0036011302793741, - "learning_rate": 1.2540578594824226e-06, - "loss": 0.5261, + "epoch": 0.52, + "grad_norm": 1.6412387802745478, + "learning_rate": 4.860394359090895e-06, + "loss": 0.5729, "step": 7379 }, { - "epoch": 0.78, - "grad_norm": 3.1369364911691604, - "learning_rate": 1.25292926313895e-06, - "loss": 0.5366, + "epoch": 0.52, + "grad_norm": 1.7730638218199797, + "learning_rate": 4.859245643848047e-06, + "loss": 0.6109, "step": 7380 }, { - "epoch": 0.78, - "grad_norm": 2.733863227114797, - "learning_rate": 1.251801102119325e-06, - "loss": 0.5542, + "epoch": 0.52, + "grad_norm": 1.7258574952874157, + "learning_rate": 4.858096936040324e-06, + "loss": 0.492, "step": 7381 }, { - "epoch": 0.78, - "grad_norm": 4.392211563561417, - "learning_rate": 1.2506733765546115e-06, - "loss": 0.5557, + "epoch": 0.52, + "grad_norm": 1.916118385761965, + "learning_rate": 4.8569482357284015e-06, + "loss": 0.5452, "step": 7382 }, { - "epoch": 0.78, - "grad_norm": 7.2289899376136315, - "learning_rate": 1.2495460865758286e-06, - "loss": 0.6201, + "epoch": 0.52, + "grad_norm": 1.6661544942603368, + "learning_rate": 4.855799542972957e-06, + "loss": 0.5665, "step": 7383 }, { - "epoch": 0.78, - "grad_norm": 2.389324115301021, - "learning_rate": 1.2484192323139382e-06, - "loss": 0.5614, + "epoch": 0.52, + "grad_norm": 1.7872691468342043, + "learning_rate": 4.854650857834668e-06, + "loss": 0.5779, "step": 7384 }, { - "epoch": 0.78, - "grad_norm": 2.4415323240042524, - "learning_rate": 1.2472928138998569e-06, - "loss": 0.5961, + "epoch": 0.52, + "grad_norm": 1.7113523873740464, + "learning_rate": 4.853502180374216e-06, + "loss": 0.5283, "step": 7385 }, { - "epoch": 0.78, - "grad_norm": 3.376325421905973, - "learning_rate": 1.2461668314644499e-06, - "loss": 0.551, + "epoch": 0.52, + "grad_norm": 1.4446689144822593, + "learning_rate": 4.852353510652274e-06, + "loss": 0.4979, "step": 7386 }, { - "epoch": 0.78, - "grad_norm": 2.6372801647944795, - "learning_rate": 1.2450412851385275e-06, - "loss": 0.6422, + "epoch": 0.52, + "grad_norm": 1.5842041800155444, + "learning_rate": 4.851204848729521e-06, + "loss": 0.5187, "step": 7387 }, { - "epoch": 0.78, - "grad_norm": 1.0428132195188833, - "learning_rate": 1.2439161750528555e-06, - "loss": 0.5497, + "epoch": 0.52, + "grad_norm": 1.7794692525222693, + "learning_rate": 4.850056194666629e-06, + "loss": 0.5082, "step": 7388 }, { - "epoch": 0.78, - "grad_norm": 4.5630278208472985, - "learning_rate": 1.2427915013381436e-06, - "loss": 0.5869, + "epoch": 0.52, + "grad_norm": 1.4942439989136183, + "learning_rate": 4.84890754852428e-06, + "loss": 0.51, "step": 7389 }, { - "epoch": 0.78, - "grad_norm": 2.0559100223708637, - "learning_rate": 1.2416672641250548e-06, - "loss": 0.5977, + "epoch": 0.52, + "grad_norm": 1.761109989153833, + "learning_rate": 4.847758910363144e-06, + "loss": 0.5862, "step": 7390 }, { - "epoch": 0.78, - "grad_norm": 2.4409035745001746, - "learning_rate": 1.2405434635441982e-06, - "loss": 0.559, + "epoch": 0.52, + "grad_norm": 6.173619657625336, + "learning_rate": 4.8466102802439e-06, + "loss": 0.512, "step": 7391 }, { - "epoch": 0.78, - "grad_norm": 4.831933762565664, - "learning_rate": 1.2394200997261358e-06, - "loss": 0.6682, + "epoch": 0.52, + "grad_norm": 1.5809107699010336, + "learning_rate": 4.845461658227218e-06, + "loss": 0.4672, "step": 7392 }, { - "epoch": 0.78, - "grad_norm": 0.8825046392899807, - "learning_rate": 1.2382971728013742e-06, - "loss": 0.5093, + "epoch": 0.52, + "grad_norm": 1.555205269954731, + "learning_rate": 4.844313044373778e-06, + "loss": 0.5021, "step": 7393 }, { - "epoch": 0.78, - "grad_norm": 2.5797992352654386, - "learning_rate": 1.2371746829003745e-06, - "loss": 0.6392, + "epoch": 0.52, + "grad_norm": 1.4719651548176411, + "learning_rate": 4.84316443874425e-06, + "loss": 0.4514, "step": 7394 }, { - "epoch": 0.78, - "grad_norm": 2.9414008384348906, - "learning_rate": 1.2360526301535408e-06, - "loss": 0.5538, + "epoch": 0.52, + "grad_norm": 1.7005320090957707, + "learning_rate": 4.8420158413993055e-06, + "loss": 0.5527, "step": 7395 }, { - "epoch": 0.78, - "grad_norm": 2.37560198530972, - "learning_rate": 1.234931014691234e-06, - "loss": 0.6542, + "epoch": 0.52, + "grad_norm": 0.7559765146035974, + "learning_rate": 4.8408672523996216e-06, + "loss": 0.4489, "step": 7396 }, { - "epoch": 0.78, - "grad_norm": 2.760017872274412, - "learning_rate": 1.2338098366437574e-06, - "loss": 0.6122, + "epoch": 0.52, + "grad_norm": 1.5339518301199495, + "learning_rate": 4.839718671805868e-06, + "loss": 0.4766, "step": 7397 }, { - "epoch": 0.78, - "grad_norm": 2.8422640961336607, - "learning_rate": 1.2326890961413663e-06, - "loss": 0.5873, + "epoch": 0.52, + "grad_norm": 1.5129962411528446, + "learning_rate": 4.838570099678718e-06, + "loss": 0.4901, "step": 7398 }, { - "epoch": 0.78, - "grad_norm": 3.1453980578084426, - "learning_rate": 1.2315687933142672e-06, - "loss": 0.6239, + "epoch": 0.53, + "grad_norm": 1.902755591832796, + "learning_rate": 4.837421536078841e-06, + "loss": 0.6113, "step": 7399 }, { - "epoch": 0.78, - "grad_norm": 2.749851183004622, - "learning_rate": 1.2304489282926109e-06, - "loss": 0.5232, + "epoch": 0.53, + "grad_norm": 1.9715098572942094, + "learning_rate": 4.83627298106691e-06, + "loss": 0.5479, "step": 7400 }, { - "epoch": 0.78, - "grad_norm": 2.472329629666702, - "learning_rate": 1.2293295012065032e-06, - "loss": 0.6043, + "epoch": 0.53, + "grad_norm": 1.5855626553528672, + "learning_rate": 4.835124434703596e-06, + "loss": 0.6019, "step": 7401 }, { - "epoch": 0.78, - "grad_norm": 2.7136693979225757, - "learning_rate": 1.228210512185992e-06, - "loss": 0.572, + "epoch": 0.53, + "grad_norm": 1.5386253832460466, + "learning_rate": 4.833975897049568e-06, + "loss": 0.5372, "step": 7402 }, { - "epoch": 0.78, - "grad_norm": 2.3539836790068907, - "learning_rate": 1.2270919613610828e-06, - "loss": 0.5805, + "epoch": 0.53, + "grad_norm": 1.6774804459341957, + "learning_rate": 4.832827368165493e-06, + "loss": 0.5077, "step": 7403 }, { - "epoch": 0.78, - "grad_norm": 2.4768244757191487, - "learning_rate": 1.2259738488617211e-06, - "loss": 0.6012, + "epoch": 0.53, + "grad_norm": 1.7557684726530616, + "learning_rate": 4.8316788481120466e-06, + "loss": 0.5546, "step": 7404 }, { - "epoch": 0.78, - "grad_norm": 2.6680180975475234, - "learning_rate": 1.2248561748178094e-06, - "loss": 0.647, + "epoch": 0.53, + "grad_norm": 1.9292104967606223, + "learning_rate": 4.830530336949891e-06, + "loss": 0.5484, "step": 7405 }, { - "epoch": 0.78, - "grad_norm": 2.2801324593756784, - "learning_rate": 1.2237389393591931e-06, - "loss": 0.688, + "epoch": 0.53, + "grad_norm": 1.7686655525169592, + "learning_rate": 4.829381834739699e-06, + "loss": 0.5792, "step": 7406 }, { - "epoch": 0.78, - "grad_norm": 3.0717420409196436, - "learning_rate": 1.222622142615671e-06, - "loss": 0.6413, + "epoch": 0.53, + "grad_norm": 1.732875795650788, + "learning_rate": 4.8282333415421345e-06, + "loss": 0.5275, "step": 7407 }, { - "epoch": 0.78, - "grad_norm": 2.3216939444158515, - "learning_rate": 1.2215057847169904e-06, - "loss": 0.5048, + "epoch": 0.53, + "grad_norm": 1.6610760187817324, + "learning_rate": 4.827084857417869e-06, + "loss": 0.4856, "step": 7408 }, { - "epoch": 0.78, - "grad_norm": 2.7933790970927896, - "learning_rate": 1.2203898657928453e-06, - "loss": 0.6127, + "epoch": 0.53, + "grad_norm": 0.678595457152506, + "learning_rate": 4.825936382427567e-06, + "loss": 0.4306, "step": 7409 }, { - "epoch": 0.78, - "grad_norm": 2.6973298552671556, - "learning_rate": 1.2192743859728784e-06, - "loss": 0.5957, + "epoch": 0.53, + "grad_norm": 1.9136617321001381, + "learning_rate": 4.824787916631895e-06, + "loss": 0.5392, "step": 7410 }, { - "epoch": 0.78, - "grad_norm": 3.010078673364939, - "learning_rate": 1.2181593453866841e-06, - "loss": 0.5948, + "epoch": 0.53, + "grad_norm": 2.3945844545734047, + "learning_rate": 4.823639460091517e-06, + "loss": 0.5581, "step": 7411 }, { - "epoch": 0.78, - "grad_norm": 1.0077226242761792, - "learning_rate": 1.2170447441638067e-06, - "loss": 0.5568, + "epoch": 0.53, + "grad_norm": 1.7590313162382252, + "learning_rate": 4.822491012867102e-06, + "loss": 0.607, "step": 7412 }, { - "epoch": 0.78, - "grad_norm": 2.3706862467692345, - "learning_rate": 1.2159305824337337e-06, - "loss": 0.5863, + "epoch": 0.53, + "grad_norm": 1.7297025618267894, + "learning_rate": 4.821342575019313e-06, + "loss": 0.4909, "step": 7413 }, { - "epoch": 0.78, - "grad_norm": 3.0863837485718935, - "learning_rate": 1.2148168603259086e-06, - "loss": 0.6177, + "epoch": 0.53, + "grad_norm": 1.4372980742143602, + "learning_rate": 4.820194146608813e-06, + "loss": 0.5717, "step": 7414 }, { - "epoch": 0.78, - "grad_norm": 3.246542506000711, - "learning_rate": 1.213703577969717e-06, - "loss": 0.5643, + "epoch": 0.53, + "grad_norm": 1.7093201199429693, + "learning_rate": 4.81904572769627e-06, + "loss": 0.5318, "step": 7415 }, { - "epoch": 0.78, - "grad_norm": 2.2507630179043505, - "learning_rate": 1.2125907354945004e-06, - "loss": 0.5588, + "epoch": 0.53, + "grad_norm": 1.475697102776539, + "learning_rate": 4.817897318342344e-06, + "loss": 0.5002, "step": 7416 }, { - "epoch": 0.78, - "grad_norm": 3.077759944294835, - "learning_rate": 1.2114783330295426e-06, - "loss": 0.6429, + "epoch": 0.53, + "grad_norm": 1.855981730009265, + "learning_rate": 4.8167489186077e-06, + "loss": 0.5901, "step": 7417 }, { - "epoch": 0.78, - "grad_norm": 2.1001739169623956, - "learning_rate": 1.210366370704082e-06, - "loss": 0.6044, + "epoch": 0.53, + "grad_norm": 1.56069675030441, + "learning_rate": 4.815600528552996e-06, + "loss": 0.5132, "step": 7418 }, { - "epoch": 0.78, - "grad_norm": 2.0635599866105245, - "learning_rate": 1.2092548486473e-06, - "loss": 0.5829, + "epoch": 0.53, + "grad_norm": 1.8140602140772257, + "learning_rate": 4.814452148238899e-06, + "loss": 0.5395, "step": 7419 }, { - "epoch": 0.78, - "grad_norm": 2.7552402862941308, - "learning_rate": 1.2081437669883323e-06, - "loss": 0.611, + "epoch": 0.53, + "grad_norm": 1.671530695698421, + "learning_rate": 4.813303777726067e-06, + "loss": 0.5604, "step": 7420 }, { - "epoch": 0.78, - "grad_norm": 2.6069981960534463, - "learning_rate": 1.2070331258562612e-06, - "loss": 0.6104, + "epoch": 0.53, + "grad_norm": 1.767389393392174, + "learning_rate": 4.812155417075164e-06, + "loss": 0.5639, "step": 7421 }, { - "epoch": 0.78, - "grad_norm": 2.173648068395166, - "learning_rate": 1.2059229253801164e-06, - "loss": 0.6458, + "epoch": 0.53, + "grad_norm": 2.081037361349603, + "learning_rate": 4.811007066346846e-06, + "loss": 0.5818, "step": 7422 }, { - "epoch": 0.78, - "grad_norm": 2.606347301566492, - "learning_rate": 1.2048131656888801e-06, - "loss": 0.6129, + "epoch": 0.53, + "grad_norm": 1.6410819600787845, + "learning_rate": 4.809858725601777e-06, + "loss": 0.4758, "step": 7423 }, { - "epoch": 0.78, - "grad_norm": 4.002157820915137, - "learning_rate": 1.2037038469114775e-06, - "loss": 0.6574, + "epoch": 0.53, + "grad_norm": 1.7808656731878307, + "learning_rate": 4.808710394900613e-06, + "loss": 0.4939, "step": 7424 }, { - "epoch": 0.78, - "grad_norm": 2.1433237595069006, - "learning_rate": 1.2025949691767895e-06, - "loss": 0.6796, + "epoch": 0.53, + "grad_norm": 1.9010587241272825, + "learning_rate": 4.807562074304015e-06, + "loss": 0.5374, "step": 7425 }, { - "epoch": 0.78, - "grad_norm": 2.6482892199703247, - "learning_rate": 1.2014865326136393e-06, - "loss": 0.6542, + "epoch": 0.53, + "grad_norm": 1.8307344161550274, + "learning_rate": 4.80641376387264e-06, + "loss": 0.5994, "step": 7426 }, { - "epoch": 0.78, - "grad_norm": 2.8554690646087244, - "learning_rate": 1.2003785373508054e-06, - "loss": 0.5909, + "epoch": 0.53, + "grad_norm": 1.5752414441390228, + "learning_rate": 4.805265463667146e-06, + "loss": 0.5539, "step": 7427 }, { - "epoch": 0.78, - "grad_norm": 2.1151193571586755, - "learning_rate": 1.1992709835170075e-06, - "loss": 0.6143, + "epoch": 0.53, + "grad_norm": 1.8435502554702665, + "learning_rate": 4.804117173748191e-06, + "loss": 0.512, "step": 7428 }, { - "epoch": 0.78, - "grad_norm": 2.3281163582456483, - "learning_rate": 1.198163871240921e-06, - "loss": 0.4967, + "epoch": 0.53, + "grad_norm": 1.836044495113223, + "learning_rate": 4.802968894176428e-06, + "loss": 0.5246, "step": 7429 }, { - "epoch": 0.78, - "grad_norm": 2.3181484526166045, - "learning_rate": 1.197057200651165e-06, - "loss": 0.6032, + "epoch": 0.53, + "grad_norm": 5.808261776435894, + "learning_rate": 4.8018206250125175e-06, + "loss": 0.5485, "step": 7430 }, { - "epoch": 0.78, - "grad_norm": 4.4330393599119065, - "learning_rate": 1.195950971876312e-06, - "loss": 0.5914, + "epoch": 0.53, + "grad_norm": 1.5316092311971667, + "learning_rate": 4.800672366317114e-06, + "loss": 0.4924, "step": 7431 }, { - "epoch": 0.78, - "grad_norm": 2.740984634407769, - "learning_rate": 1.1948451850448767e-06, - "loss": 0.7403, + "epoch": 0.53, + "grad_norm": 3.935278511444463, + "learning_rate": 4.799524118150871e-06, + "loss": 0.5741, "step": 7432 }, { - "epoch": 0.78, - "grad_norm": 2.3670282194811905, - "learning_rate": 1.1937398402853283e-06, - "loss": 0.5251, + "epoch": 0.53, + "grad_norm": 1.5135101065081291, + "learning_rate": 4.798375880574442e-06, + "loss": 0.4955, "step": 7433 }, { - "epoch": 0.78, - "grad_norm": 1.0276430189430175, - "learning_rate": 1.1926349377260843e-06, - "loss": 0.538, + "epoch": 0.53, + "grad_norm": 1.9201351532437316, + "learning_rate": 4.797227653648483e-06, + "loss": 0.558, "step": 7434 }, { - "epoch": 0.78, - "grad_norm": 2.4380874996957984, - "learning_rate": 1.1915304774955054e-06, - "loss": 0.6245, + "epoch": 0.53, + "grad_norm": 1.5240903669096573, + "learning_rate": 4.7960794374336465e-06, + "loss": 0.5736, "step": 7435 }, { - "epoch": 0.78, - "grad_norm": 3.242941226659693, - "learning_rate": 1.1904264597219078e-06, - "loss": 0.5773, + "epoch": 0.53, + "grad_norm": 1.5963124220227844, + "learning_rate": 4.794931231990586e-06, + "loss": 0.5217, "step": 7436 }, { - "epoch": 0.78, - "grad_norm": 2.993175401734412, - "learning_rate": 1.189322884533551e-06, - "loss": 0.6, + "epoch": 0.53, + "grad_norm": 1.7073144660270758, + "learning_rate": 4.79378303737995e-06, + "loss": 0.5832, "step": 7437 }, { - "epoch": 0.78, - "grad_norm": 0.9803046786575148, - "learning_rate": 1.1882197520586464e-06, - "loss": 0.5364, + "epoch": 0.53, + "grad_norm": 1.7683480343901385, + "learning_rate": 4.792634853662396e-06, + "loss": 0.4593, "step": 7438 }, { - "epoch": 0.78, - "grad_norm": 3.157862409696275, - "learning_rate": 1.1871170624253515e-06, - "loss": 0.5523, + "epoch": 0.53, + "grad_norm": 1.500929338342082, + "learning_rate": 4.79148668089857e-06, + "loss": 0.5354, "step": 7439 }, { - "epoch": 0.78, - "grad_norm": 2.5580589814696273, - "learning_rate": 1.1860148157617757e-06, - "loss": 0.6233, + "epoch": 0.53, + "grad_norm": 1.8788801129713897, + "learning_rate": 4.7903385191491246e-06, + "loss": 0.5449, "step": 7440 }, { - "epoch": 0.78, - "grad_norm": 2.4718486982211854, - "learning_rate": 1.1849130121959717e-06, - "loss": 0.5901, + "epoch": 0.53, + "grad_norm": 1.7817568346780526, + "learning_rate": 4.789190368474708e-06, + "loss": 0.537, "step": 7441 }, { - "epoch": 0.78, - "grad_norm": 2.4594217860273098, - "learning_rate": 1.1838116518559474e-06, - "loss": 0.6861, + "epoch": 0.53, + "grad_norm": 2.0801183752217605, + "learning_rate": 4.7880422289359714e-06, + "loss": 0.5422, "step": 7442 }, { - "epoch": 0.78, - "grad_norm": 0.9338351636324551, - "learning_rate": 1.1827107348696526e-06, - "loss": 0.5445, + "epoch": 0.53, + "grad_norm": 2.2179929388035533, + "learning_rate": 4.786894100593563e-06, + "loss": 0.5841, "step": 7443 }, { - "epoch": 0.78, - "grad_norm": 2.3059234955489814, - "learning_rate": 1.181610261364991e-06, - "loss": 0.5499, + "epoch": 0.53, + "grad_norm": 1.7070374580430774, + "learning_rate": 4.785745983508128e-06, + "loss": 0.5329, "step": 7444 }, { - "epoch": 0.78, - "grad_norm": 2.581067104733737, - "learning_rate": 1.1805102314698103e-06, - "loss": 0.6374, + "epoch": 0.53, + "grad_norm": 1.6441414531436904, + "learning_rate": 4.7845978777403175e-06, + "loss": 0.5323, "step": 7445 }, { - "epoch": 0.78, - "grad_norm": 2.9256095254675536, - "learning_rate": 1.1794106453119098e-06, - "loss": 0.5952, + "epoch": 0.53, + "grad_norm": 1.731431387725152, + "learning_rate": 4.783449783350779e-06, + "loss": 0.5236, "step": 7446 }, { - "epoch": 0.78, - "grad_norm": 2.5727809288583776, - "learning_rate": 1.1783115030190378e-06, - "loss": 0.6924, + "epoch": 0.53, + "grad_norm": 2.5239439755761306, + "learning_rate": 4.782301700400155e-06, + "loss": 0.4864, "step": 7447 }, { - "epoch": 0.78, - "grad_norm": 4.922966924396616, - "learning_rate": 1.1772128047188864e-06, - "loss": 0.6865, + "epoch": 0.53, + "grad_norm": 2.851349324133474, + "learning_rate": 4.781153628949092e-06, + "loss": 0.5619, "step": 7448 }, { - "epoch": 0.78, - "grad_norm": 2.39429256222939, - "learning_rate": 1.1761145505391025e-06, - "loss": 0.6303, + "epoch": 0.53, + "grad_norm": 1.8905461710401283, + "learning_rate": 4.780005569058236e-06, + "loss": 0.5012, "step": 7449 }, { - "epoch": 0.78, - "grad_norm": 2.7885079254569667, - "learning_rate": 1.1750167406072743e-06, - "loss": 0.5501, + "epoch": 0.53, + "grad_norm": 1.747240491128981, + "learning_rate": 4.778857520788233e-06, + "loss": 0.4931, "step": 7450 }, { - "epoch": 0.78, - "grad_norm": 2.4497210304886603, - "learning_rate": 1.1739193750509465e-06, - "loss": 0.5928, + "epoch": 0.53, + "grad_norm": 1.6218377585856663, + "learning_rate": 4.777709484199724e-06, + "loss": 0.5544, "step": 7451 }, { - "epoch": 0.78, - "grad_norm": 2.4756214907759198, - "learning_rate": 1.1728224539976035e-06, - "loss": 0.5829, + "epoch": 0.53, + "grad_norm": 1.528074819777602, + "learning_rate": 4.776561459353352e-06, + "loss": 0.5253, "step": 7452 }, { - "epoch": 0.78, - "grad_norm": 2.503009767146594, - "learning_rate": 1.1717259775746865e-06, - "loss": 0.5743, + "epoch": 0.53, + "grad_norm": 0.8394076133505175, + "learning_rate": 4.775413446309763e-06, + "loss": 0.4647, "step": 7453 }, { - "epoch": 0.78, - "grad_norm": 2.4617513372135003, - "learning_rate": 1.1706299459095776e-06, - "loss": 0.6251, + "epoch": 0.53, + "grad_norm": 1.8208987385730326, + "learning_rate": 4.774265445129596e-06, + "loss": 0.6609, "step": 7454 }, { - "epoch": 0.78, - "grad_norm": 2.2715662368230274, - "learning_rate": 1.1695343591296115e-06, - "loss": 0.6545, + "epoch": 0.53, + "grad_norm": 1.5537511415173335, + "learning_rate": 4.773117455873493e-06, + "loss": 0.4675, "step": 7455 }, { - "epoch": 0.78, - "grad_norm": 3.2007462619830287, - "learning_rate": 1.1684392173620729e-06, - "loss": 0.5534, + "epoch": 0.53, + "grad_norm": 1.6393589097206174, + "learning_rate": 4.7719694786020946e-06, + "loss": 0.529, "step": 7456 }, { - "epoch": 0.78, - "grad_norm": 0.9869030111837869, - "learning_rate": 1.1673445207341882e-06, - "loss": 0.534, + "epoch": 0.53, + "grad_norm": 1.7787301197939274, + "learning_rate": 4.770821513376041e-06, + "loss": 0.5398, "step": 7457 }, { - "epoch": 0.78, - "grad_norm": 0.9843494999894192, - "learning_rate": 1.1662502693731393e-06, - "loss": 0.5375, + "epoch": 0.53, + "grad_norm": 2.0877136113180232, + "learning_rate": 4.769673560255972e-06, + "loss": 0.5801, "step": 7458 }, { - "epoch": 0.78, - "grad_norm": 2.964320138816884, - "learning_rate": 1.1651564634060509e-06, - "loss": 0.5889, + "epoch": 0.53, + "grad_norm": 1.7021447527225175, + "learning_rate": 4.768525619302525e-06, + "loss": 0.5355, "step": 7459 }, { - "epoch": 0.79, - "grad_norm": 2.3924454080709854, - "learning_rate": 1.1640631029600002e-06, - "loss": 0.5073, + "epoch": 0.53, + "grad_norm": 2.015943286957345, + "learning_rate": 4.7673776905763395e-06, + "loss": 0.5009, "step": 7460 }, { - "epoch": 0.79, - "grad_norm": 2.8236225380832143, - "learning_rate": 1.1629701881620086e-06, - "loss": 0.6282, + "epoch": 0.53, + "grad_norm": 1.6458412097726336, + "learning_rate": 4.766229774138054e-06, + "loss": 0.5099, "step": 7461 }, { - "epoch": 0.79, - "grad_norm": 3.172480547369297, - "learning_rate": 1.1618777191390502e-06, - "loss": 0.5964, + "epoch": 0.53, + "grad_norm": 1.585458747173728, + "learning_rate": 4.765081870048304e-06, + "loss": 0.5225, "step": 7462 }, { - "epoch": 0.79, - "grad_norm": 2.810127828045121, - "learning_rate": 1.1607856960180413e-06, - "loss": 0.6193, + "epoch": 0.53, + "grad_norm": 1.4320428462954977, + "learning_rate": 4.763933978367722e-06, + "loss": 0.543, "step": 7463 }, { - "epoch": 0.79, - "grad_norm": 2.6027058379471204, - "learning_rate": 1.1596941189258542e-06, - "loss": 0.5957, + "epoch": 0.53, + "grad_norm": 1.9244651984257728, + "learning_rate": 4.762786099156949e-06, + "loss": 0.5454, "step": 7464 }, { - "epoch": 0.79, - "grad_norm": 2.5799914326081077, - "learning_rate": 1.1586029879893018e-06, - "loss": 0.5961, + "epoch": 0.53, + "grad_norm": 1.8985567502855376, + "learning_rate": 4.76163823247662e-06, + "loss": 0.5563, "step": 7465 }, { - "epoch": 0.79, - "grad_norm": 2.272129728569525, - "learning_rate": 1.1575123033351514e-06, - "loss": 0.574, + "epoch": 0.53, + "grad_norm": 2.410341750380147, + "learning_rate": 4.760490378387368e-06, + "loss": 0.5446, "step": 7466 }, { - "epoch": 0.79, - "grad_norm": 3.353854643372349, - "learning_rate": 1.1564220650901126e-06, - "loss": 0.548, + "epoch": 0.53, + "grad_norm": 2.175554390380109, + "learning_rate": 4.759342536949822e-06, + "loss": 0.5732, "step": 7467 }, { - "epoch": 0.79, - "grad_norm": 2.284598981438445, - "learning_rate": 1.1553322733808474e-06, - "loss": 0.6057, + "epoch": 0.53, + "grad_norm": 1.6401908700567378, + "learning_rate": 4.758194708224621e-06, + "loss": 0.5312, "step": 7468 }, { - "epoch": 0.79, - "grad_norm": 2.5638147489723737, - "learning_rate": 1.1542429283339669e-06, - "loss": 0.625, + "epoch": 0.53, + "grad_norm": 0.7128127440696862, + "learning_rate": 4.7570468922723946e-06, + "loss": 0.4326, "step": 7469 }, { - "epoch": 0.79, - "grad_norm": 2.859248521334551, - "learning_rate": 1.153154030076024e-06, - "loss": 0.5896, + "epoch": 0.53, + "grad_norm": 2.3036649742093136, + "learning_rate": 4.755899089153774e-06, + "loss": 0.5559, "step": 7470 }, { - "epoch": 0.79, - "grad_norm": 2.1507209034830352, - "learning_rate": 1.1520655787335272e-06, - "loss": 0.5909, + "epoch": 0.53, + "grad_norm": 1.7487258241775487, + "learning_rate": 4.754751298929391e-06, + "loss": 0.5336, "step": 7471 }, { - "epoch": 0.79, - "grad_norm": 2.2804744847150293, - "learning_rate": 1.150977574432927e-06, - "loss": 0.6216, + "epoch": 0.53, + "grad_norm": 1.9231597434583017, + "learning_rate": 4.753603521659874e-06, + "loss": 0.5421, "step": 7472 }, { - "epoch": 0.79, - "grad_norm": 2.2755713818694026, - "learning_rate": 1.1498900173006271e-06, - "loss": 0.5953, + "epoch": 0.53, + "grad_norm": 1.6840225014811399, + "learning_rate": 4.752455757405857e-06, + "loss": 0.5594, "step": 7473 }, { - "epoch": 0.79, - "grad_norm": 2.4375823375991676, - "learning_rate": 1.1488029074629742e-06, - "loss": 0.5324, + "epoch": 0.53, + "grad_norm": 1.6712448042547312, + "learning_rate": 4.751308006227965e-06, + "loss": 0.4901, "step": 7474 }, { - "epoch": 0.79, - "grad_norm": 2.6695360497471374, - "learning_rate": 1.1477162450462681e-06, - "loss": 0.5473, + "epoch": 0.53, + "grad_norm": 2.5204317961760636, + "learning_rate": 4.7501602681868234e-06, + "loss": 0.5434, "step": 7475 }, { - "epoch": 0.79, - "grad_norm": 3.055784414516847, - "learning_rate": 1.1466300301767513e-06, - "loss": 0.6207, + "epoch": 0.53, + "grad_norm": 1.6245395466828847, + "learning_rate": 4.749012543343066e-06, + "loss": 0.5367, "step": 7476 }, { - "epoch": 0.79, - "grad_norm": 2.3379616900640507, - "learning_rate": 1.1455442629806208e-06, - "loss": 0.57, + "epoch": 0.53, + "grad_norm": 2.1041562578934156, + "learning_rate": 4.747864831757316e-06, + "loss": 0.6058, "step": 7477 }, { - "epoch": 0.79, - "grad_norm": 2.3050676485829817, - "learning_rate": 1.1444589435840136e-06, - "loss": 0.5941, + "epoch": 0.53, + "grad_norm": 2.404924992002359, + "learning_rate": 4.746717133490199e-06, + "loss": 0.5595, "step": 7478 }, { - "epoch": 0.79, - "grad_norm": 2.2327610274880056, - "learning_rate": 1.1433740721130227e-06, - "loss": 0.6705, + "epoch": 0.53, + "grad_norm": 1.5152583021831507, + "learning_rate": 4.745569448602341e-06, + "loss": 0.5743, "step": 7479 }, { - "epoch": 0.79, - "grad_norm": 2.318030102730509, - "learning_rate": 1.1422896486936819e-06, - "loss": 0.5924, + "epoch": 0.53, + "grad_norm": 0.7220905969730478, + "learning_rate": 4.74442177715437e-06, + "loss": 0.4477, "step": 7480 }, { - "epoch": 0.79, - "grad_norm": 2.3743209801016847, - "learning_rate": 1.1412056734519788e-06, - "loss": 0.6173, + "epoch": 0.53, + "grad_norm": 1.8326847278405758, + "learning_rate": 4.7432741192069045e-06, + "loss": 0.5264, "step": 7481 }, { - "epoch": 0.79, - "grad_norm": 2.34093180724465, - "learning_rate": 1.1401221465138468e-06, - "loss": 0.6462, + "epoch": 0.53, + "grad_norm": 1.5997393604720236, + "learning_rate": 4.742126474820568e-06, + "loss": 0.4928, "step": 7482 }, { - "epoch": 0.79, - "grad_norm": 2.6932639467851116, - "learning_rate": 1.1390390680051649e-06, - "loss": 0.6229, + "epoch": 0.53, + "grad_norm": 1.3972868113779104, + "learning_rate": 4.740978844055989e-06, + "loss": 0.5565, "step": 7483 }, { - "epoch": 0.79, - "grad_norm": 2.346396013718988, - "learning_rate": 1.1379564380517648e-06, - "loss": 0.6471, + "epoch": 0.53, + "grad_norm": 2.197027331142435, + "learning_rate": 4.7398312269737824e-06, + "loss": 0.5564, "step": 7484 }, { - "epoch": 0.79, - "grad_norm": 2.5191557971368876, - "learning_rate": 1.1368742567794199e-06, - "loss": 0.5747, + "epoch": 0.53, + "grad_norm": 1.944802644093845, + "learning_rate": 4.738683623634573e-06, + "loss": 0.5541, "step": 7485 }, { - "epoch": 0.79, - "grad_norm": 2.187304261864126, - "learning_rate": 1.1357925243138585e-06, - "loss": 0.6561, + "epoch": 0.53, + "grad_norm": 1.6551526735568334, + "learning_rate": 4.737536034098979e-06, + "loss": 0.5048, "step": 7486 }, { - "epoch": 0.79, - "grad_norm": 11.808798126077034, - "learning_rate": 1.1347112407807499e-06, - "loss": 0.61, + "epoch": 0.53, + "grad_norm": 2.31167449680483, + "learning_rate": 4.7363884584276216e-06, + "loss": 0.4427, "step": 7487 }, { - "epoch": 0.79, - "grad_norm": 2.5589240903441346, - "learning_rate": 1.1336304063057169e-06, - "loss": 0.5923, + "epoch": 0.53, + "grad_norm": 1.8542841897334028, + "learning_rate": 4.73524089668112e-06, + "loss": 0.5451, "step": 7488 }, { - "epoch": 0.79, - "grad_norm": 2.2691294777604085, - "learning_rate": 1.1325500210143253e-06, - "loss": 0.6155, + "epoch": 0.53, + "grad_norm": 2.3575642419174008, + "learning_rate": 4.734093348920091e-06, + "loss": 0.5581, "step": 7489 }, { - "epoch": 0.79, - "grad_norm": 2.901817891190817, - "learning_rate": 1.1314700850320948e-06, - "loss": 0.6281, + "epoch": 0.53, + "grad_norm": 1.6910039425539165, + "learning_rate": 4.7329458152051504e-06, + "loss": 0.4889, "step": 7490 }, { - "epoch": 0.79, - "grad_norm": 2.711266040481578, - "learning_rate": 1.1303905984844848e-06, - "loss": 0.6484, + "epoch": 0.53, + "grad_norm": 2.2840380467395156, + "learning_rate": 4.73179829559692e-06, + "loss": 0.5723, "step": 7491 }, { - "epoch": 0.79, - "grad_norm": 2.2630100169064113, - "learning_rate": 1.1293115614969109e-06, - "loss": 0.5749, + "epoch": 0.53, + "grad_norm": 1.6401781185173763, + "learning_rate": 4.73065079015601e-06, + "loss": 0.4466, "step": 7492 }, { - "epoch": 0.79, - "grad_norm": 2.207926785870351, - "learning_rate": 1.1282329741947295e-06, - "loss": 0.584, + "epoch": 0.53, + "grad_norm": 1.58735663124372, + "learning_rate": 4.7295032989430375e-06, + "loss": 0.5282, "step": 7493 }, { - "epoch": 0.79, - "grad_norm": 2.3024880371784513, - "learning_rate": 1.1271548367032487e-06, - "loss": 0.6285, + "epoch": 0.53, + "grad_norm": 3.063171216389885, + "learning_rate": 4.728355822018618e-06, + "loss": 0.5496, "step": 7494 }, { - "epoch": 0.79, - "grad_norm": 2.259781922595136, - "learning_rate": 1.1260771491477252e-06, - "loss": 0.534, + "epoch": 0.53, + "grad_norm": 1.8310121786480038, + "learning_rate": 4.727208359443366e-06, + "loss": 0.4686, "step": 7495 }, { - "epoch": 0.79, - "grad_norm": 2.3229570086330815, - "learning_rate": 1.1249999116533589e-06, - "loss": 0.5503, + "epoch": 0.53, + "grad_norm": 1.7136942947762779, + "learning_rate": 4.726060911277892e-06, + "loss": 0.5378, "step": 7496 }, { - "epoch": 0.79, - "grad_norm": 2.553935192730057, - "learning_rate": 1.1239231243453025e-06, - "loss": 0.664, + "epoch": 0.53, + "grad_norm": 2.656754583939008, + "learning_rate": 4.724913477582808e-06, + "loss": 0.4888, "step": 7497 }, { - "epoch": 0.79, - "grad_norm": 0.92848847705716, - "learning_rate": 1.122846787348652e-06, - "loss": 0.5382, + "epoch": 0.53, + "grad_norm": 0.6924432510906878, + "learning_rate": 4.723766058418726e-06, + "loss": 0.4523, "step": 7498 }, { - "epoch": 0.79, - "grad_norm": 2.25087708292023, - "learning_rate": 1.1217709007884548e-06, - "loss": 0.6478, + "epoch": 0.53, + "grad_norm": 0.6532410295022965, + "learning_rate": 4.722618653846259e-06, + "loss": 0.4179, "step": 7499 }, { - "epoch": 0.79, - "grad_norm": 2.887135255747007, - "learning_rate": 1.1206954647897023e-06, - "loss": 0.6988, + "epoch": 0.53, + "grad_norm": 1.6144933320568173, + "learning_rate": 4.721471263926013e-06, + "loss": 0.5016, "step": 7500 }, { - "epoch": 0.79, - "grad_norm": 2.881071494956094, - "learning_rate": 1.1196204794773385e-06, - "loss": 0.6754, + "epoch": 0.53, + "grad_norm": 2.005213481341487, + "learning_rate": 4.720323888718598e-06, + "loss": 0.5759, "step": 7501 }, { - "epoch": 0.79, - "grad_norm": 3.8124395050447877, - "learning_rate": 1.1185459449762486e-06, - "loss": 0.6218, + "epoch": 0.53, + "grad_norm": 2.262045600899499, + "learning_rate": 4.7191765282846234e-06, + "loss": 0.5684, "step": 7502 }, { - "epoch": 0.79, - "grad_norm": 5.5635427523252625, - "learning_rate": 1.1174718614112711e-06, - "loss": 0.6018, + "epoch": 0.53, + "grad_norm": 1.5910710544108388, + "learning_rate": 4.718029182684697e-06, + "loss": 0.5753, "step": 7503 }, { - "epoch": 0.79, - "grad_norm": 2.1176840263880585, - "learning_rate": 1.1163982289071907e-06, - "loss": 0.5724, + "epoch": 0.53, + "grad_norm": 1.7124610139064993, + "learning_rate": 4.716881851979425e-06, + "loss": 0.5978, "step": 7504 }, { - "epoch": 0.79, - "grad_norm": 2.389675591616161, - "learning_rate": 1.1153250475887362e-06, - "loss": 0.658, + "epoch": 0.53, + "grad_norm": 3.1515524398778387, + "learning_rate": 4.715734536229411e-06, + "loss": 0.5422, "step": 7505 }, { - "epoch": 0.79, - "grad_norm": 3.2712660526620176, - "learning_rate": 1.1142523175805896e-06, - "loss": 0.6494, + "epoch": 0.53, + "grad_norm": 1.712425424114244, + "learning_rate": 4.714587235495263e-06, + "loss": 0.5096, "step": 7506 }, { - "epoch": 0.79, - "grad_norm": 2.3498472808911988, - "learning_rate": 1.113180039007375e-06, - "loss": 0.6334, + "epoch": 0.53, + "grad_norm": 0.7656689492936505, + "learning_rate": 4.713439949837585e-06, + "loss": 0.4267, "step": 7507 }, { - "epoch": 0.79, - "grad_norm": 4.183640363375537, - "learning_rate": 1.112108211993669e-06, - "loss": 0.6067, + "epoch": 0.53, + "grad_norm": 1.59352688301269, + "learning_rate": 4.712292679316978e-06, + "loss": 0.5595, "step": 7508 }, { - "epoch": 0.79, - "grad_norm": 4.308029778683292, - "learning_rate": 1.1110368366639906e-06, - "loss": 0.6066, + "epoch": 0.53, + "grad_norm": 1.5586540046966229, + "learning_rate": 4.711145423994047e-06, + "loss": 0.4992, "step": 7509 }, { - "epoch": 0.79, - "grad_norm": 8.528977099429406, - "learning_rate": 1.109965913142812e-06, - "loss": 0.6271, + "epoch": 0.53, + "grad_norm": 1.5852895726635283, + "learning_rate": 4.7099981839293956e-06, + "loss": 0.5227, "step": 7510 }, { - "epoch": 0.79, - "grad_norm": 2.7928225891739076, - "learning_rate": 1.1088954415545478e-06, - "loss": 0.5326, + "epoch": 0.53, + "grad_norm": 9.304911006753334, + "learning_rate": 4.708850959183622e-06, + "loss": 0.5499, "step": 7511 }, { - "epoch": 0.79, - "grad_norm": 2.4456570967777926, - "learning_rate": 1.107825422023564e-06, - "loss": 0.6375, + "epoch": 0.53, + "grad_norm": 2.167496624122782, + "learning_rate": 4.707703749817325e-06, + "loss": 0.5393, "step": 7512 }, { - "epoch": 0.79, - "grad_norm": 3.0183023237081814, - "learning_rate": 1.1067558546741708e-06, - "loss": 0.6086, + "epoch": 0.53, + "grad_norm": 1.5680350233906049, + "learning_rate": 4.706556555891108e-06, + "loss": 0.5709, "step": 7513 }, { - "epoch": 0.79, - "grad_norm": 2.757754038229962, - "learning_rate": 1.1056867396306293e-06, - "loss": 0.5815, + "epoch": 0.53, + "grad_norm": 2.201014033283124, + "learning_rate": 4.7054093774655694e-06, + "loss": 0.524, "step": 7514 }, { - "epoch": 0.79, - "grad_norm": 2.5086536878121533, - "learning_rate": 1.1046180770171433e-06, - "loss": 0.5206, + "epoch": 0.53, + "grad_norm": 1.5191105516273415, + "learning_rate": 4.704262214601305e-06, + "loss": 0.4511, "step": 7515 }, { - "epoch": 0.79, - "grad_norm": 1.022738679492978, - "learning_rate": 1.1035498669578693e-06, - "loss": 0.5553, + "epoch": 0.53, + "grad_norm": 1.6372813688524974, + "learning_rate": 4.7031150673589095e-06, + "loss": 0.5218, "step": 7516 }, { - "epoch": 0.79, - "grad_norm": 2.9716402092398586, - "learning_rate": 1.1024821095769089e-06, - "loss": 0.5598, + "epoch": 0.53, + "grad_norm": 1.7736092171866171, + "learning_rate": 4.701967935798985e-06, + "loss": 0.5531, "step": 7517 }, { - "epoch": 0.79, - "grad_norm": 2.7359639039165096, - "learning_rate": 1.1014148049983097e-06, - "loss": 0.6813, + "epoch": 0.53, + "grad_norm": 1.543645108684181, + "learning_rate": 4.700820819982125e-06, + "loss": 0.5111, "step": 7518 }, { - "epoch": 0.79, - "grad_norm": 2.3089260079854244, - "learning_rate": 1.1003479533460698e-06, - "loss": 0.5112, + "epoch": 0.53, + "grad_norm": 2.4953844619178405, + "learning_rate": 4.699673719968922e-06, + "loss": 0.5124, "step": 7519 }, { - "epoch": 0.79, - "grad_norm": 3.623336530654308, - "learning_rate": 1.099281554744131e-06, - "loss": 0.6542, + "epoch": 0.53, + "grad_norm": 1.5927424257296117, + "learning_rate": 4.698526635819969e-06, + "loss": 0.5607, "step": 7520 }, { - "epoch": 0.79, - "grad_norm": 2.247425868920514, - "learning_rate": 1.0982156093163864e-06, - "loss": 0.571, + "epoch": 0.53, + "grad_norm": 1.9449488885871693, + "learning_rate": 4.6973795675958625e-06, + "loss": 0.5952, "step": 7521 }, { - "epoch": 0.79, - "grad_norm": 4.231031748647357, - "learning_rate": 1.0971501171866717e-06, - "loss": 0.5311, + "epoch": 0.53, + "grad_norm": 1.9984581668462433, + "learning_rate": 4.696232515357192e-06, + "loss": 0.5515, "step": 7522 }, { - "epoch": 0.79, - "grad_norm": 2.718419525290093, - "learning_rate": 1.0960850784787763e-06, - "loss": 0.5205, + "epoch": 0.53, + "grad_norm": 1.6852881570264506, + "learning_rate": 4.6950854791645485e-06, + "loss": 0.5158, "step": 7523 }, { - "epoch": 0.79, - "grad_norm": 3.0286666822872843, - "learning_rate": 1.09502049331643e-06, - "loss": 0.611, + "epoch": 0.53, + "grad_norm": 2.1324273564783023, + "learning_rate": 4.693938459078524e-06, + "loss": 0.5058, "step": 7524 }, { - "epoch": 0.79, - "grad_norm": 2.9672246988207207, - "learning_rate": 1.0939563618233156e-06, - "loss": 0.5913, + "epoch": 0.53, + "grad_norm": 1.9104510018384073, + "learning_rate": 4.692791455159708e-06, + "loss": 0.4644, "step": 7525 }, { - "epoch": 0.79, - "grad_norm": 2.6418538967198884, - "learning_rate": 1.0928926841230585e-06, - "loss": 0.59, + "epoch": 0.53, + "grad_norm": 1.9731952149685696, + "learning_rate": 4.6916444674686885e-06, + "loss": 0.5055, "step": 7526 }, { - "epoch": 0.79, - "grad_norm": 2.2332248750501242, - "learning_rate": 1.0918294603392371e-06, - "loss": 0.5947, + "epoch": 0.53, + "grad_norm": 1.9198955078713131, + "learning_rate": 4.69049749606605e-06, + "loss": 0.4897, "step": 7527 }, { - "epoch": 0.79, - "grad_norm": 2.5309030658613185, - "learning_rate": 1.0907666905953696e-06, - "loss": 0.5941, + "epoch": 0.53, + "grad_norm": 1.5513023343118293, + "learning_rate": 4.689350541012386e-06, + "loss": 0.5074, "step": 7528 }, { - "epoch": 0.79, - "grad_norm": 2.3540324184287456, - "learning_rate": 1.0897043750149277e-06, - "loss": 0.6044, + "epoch": 0.53, + "grad_norm": 1.6341022434994197, + "learning_rate": 4.688203602368277e-06, + "loss": 0.4657, "step": 7529 }, { - "epoch": 0.79, - "grad_norm": 2.770490477689187, - "learning_rate": 1.0886425137213297e-06, - "loss": 0.594, + "epoch": 0.53, + "grad_norm": 1.7720462701558881, + "learning_rate": 4.687056680194312e-06, + "loss": 0.5403, "step": 7530 }, { - "epoch": 0.79, - "grad_norm": 2.6612418290795157, - "learning_rate": 1.087581106837936e-06, - "loss": 0.5736, + "epoch": 0.53, + "grad_norm": 1.7849408711916237, + "learning_rate": 4.685909774551071e-06, + "loss": 0.5098, "step": 7531 }, { - "epoch": 0.79, - "grad_norm": 2.6742845066681014, - "learning_rate": 1.086520154488061e-06, - "loss": 0.6691, + "epoch": 0.53, + "grad_norm": 2.332887165781788, + "learning_rate": 4.6847628854991425e-06, + "loss": 0.6053, "step": 7532 }, { - "epoch": 0.79, - "grad_norm": 2.9951587604725614, - "learning_rate": 1.0854596567949605e-06, - "loss": 0.5694, + "epoch": 0.53, + "grad_norm": 1.7139707142646405, + "learning_rate": 4.683616013099106e-06, + "loss": 0.6241, "step": 7533 }, { - "epoch": 0.79, - "grad_norm": 2.5789976556897116, - "learning_rate": 1.084399613881843e-06, - "loss": 0.6256, + "epoch": 0.53, + "grad_norm": 1.665656818969037, + "learning_rate": 4.682469157411544e-06, + "loss": 0.5302, "step": 7534 }, { - "epoch": 0.79, - "grad_norm": 3.7119280648765347, - "learning_rate": 1.0833400258718579e-06, - "loss": 0.5858, + "epoch": 0.53, + "grad_norm": 1.5444400732674446, + "learning_rate": 4.681322318497035e-06, + "loss": 0.5197, "step": 7535 }, { - "epoch": 0.79, - "grad_norm": 3.597761699649087, - "learning_rate": 1.0822808928881078e-06, - "loss": 0.5949, + "epoch": 0.53, + "grad_norm": 1.7295750464442567, + "learning_rate": 4.680175496416163e-06, + "loss": 0.5574, "step": 7536 }, { - "epoch": 0.79, - "grad_norm": 2.7367475679821016, - "learning_rate": 1.0812222150536379e-06, - "loss": 0.4645, + "epoch": 0.53, + "grad_norm": 1.6138834141619336, + "learning_rate": 4.679028691229503e-06, + "loss": 0.4755, "step": 7537 }, { - "epoch": 0.79, - "grad_norm": 2.775373037805519, - "learning_rate": 1.0801639924914437e-06, - "loss": 0.6371, + "epoch": 0.53, + "grad_norm": 5.08560198395392, + "learning_rate": 4.677881902997638e-06, + "loss": 0.4584, "step": 7538 }, { - "epoch": 0.79, - "grad_norm": 2.673800251379294, - "learning_rate": 1.0791062253244644e-06, - "loss": 0.562, + "epoch": 0.53, + "grad_norm": 1.7858440384474503, + "learning_rate": 4.676735131781139e-06, + "loss": 0.5737, "step": 7539 }, { - "epoch": 0.79, - "grad_norm": 4.733905978311876, - "learning_rate": 1.07804891367559e-06, - "loss": 0.6194, + "epoch": 0.54, + "grad_norm": 1.6706114807199945, + "learning_rate": 4.675588377640588e-06, + "loss": 0.5603, "step": 7540 }, { - "epoch": 0.79, - "grad_norm": 2.189114404251597, - "learning_rate": 1.0769920576676569e-06, - "loss": 0.5761, + "epoch": 0.54, + "grad_norm": 1.9456859921286078, + "learning_rate": 4.674441640636559e-06, + "loss": 0.5443, "step": 7541 }, { - "epoch": 0.79, - "grad_norm": 2.283883510957929, - "learning_rate": 1.0759356574234447e-06, - "loss": 0.6039, + "epoch": 0.54, + "grad_norm": 2.078625484326005, + "learning_rate": 4.673294920829623e-06, + "loss": 0.5181, "step": 7542 }, { - "epoch": 0.79, - "grad_norm": 3.347107417686752, - "learning_rate": 1.0748797130656862e-06, - "loss": 0.6732, + "epoch": 0.54, + "grad_norm": 1.675332270080036, + "learning_rate": 4.672148218280358e-06, + "loss": 0.5375, "step": 7543 }, { - "epoch": 0.79, - "grad_norm": 3.3757009779681755, - "learning_rate": 1.0738242247170549e-06, - "loss": 0.5663, + "epoch": 0.54, + "grad_norm": 1.7271741595536443, + "learning_rate": 4.671001533049334e-06, + "loss": 0.5509, "step": 7544 }, { - "epoch": 0.79, - "grad_norm": 2.1237347595673874, - "learning_rate": 1.0727691925001765e-06, - "loss": 0.6165, + "epoch": 0.54, + "grad_norm": 1.67701327800366, + "learning_rate": 4.669854865197126e-06, + "loss": 0.5517, "step": 7545 }, { - "epoch": 0.79, - "grad_norm": 3.0239588731460882, - "learning_rate": 1.0717146165376202e-06, - "loss": 0.6566, + "epoch": 0.54, + "grad_norm": 1.5625369440612968, + "learning_rate": 4.668708214784299e-06, + "loss": 0.4588, "step": 7546 }, { - "epoch": 0.79, - "grad_norm": 2.560705932085416, - "learning_rate": 1.0706604969519052e-06, - "loss": 0.6015, + "epoch": 0.54, + "grad_norm": 1.4726044404341252, + "learning_rate": 4.6675615818714295e-06, + "loss": 0.4374, "step": 7547 }, { - "epoch": 0.79, - "grad_norm": 2.331047433053311, - "learning_rate": 1.069606833865494e-06, - "loss": 0.6184, + "epoch": 0.54, + "grad_norm": 1.687053892557023, + "learning_rate": 4.666414966519084e-06, + "loss": 0.6007, "step": 7548 }, { - "epoch": 0.79, - "grad_norm": 2.347877312775048, - "learning_rate": 1.0685536274008002e-06, - "loss": 0.5979, + "epoch": 0.54, + "grad_norm": 1.7850736355110004, + "learning_rate": 4.66526836878783e-06, + "loss": 0.5397, "step": 7549 }, { - "epoch": 0.79, - "grad_norm": 2.246678913685663, - "learning_rate": 1.0675008776801804e-06, - "loss": 0.5749, + "epoch": 0.54, + "grad_norm": 1.6126145146068027, + "learning_rate": 4.664121788738233e-06, + "loss": 0.506, "step": 7550 }, { - "epoch": 0.79, - "grad_norm": 7.0685585261266795, - "learning_rate": 1.066448584825942e-06, - "loss": 0.6043, + "epoch": 0.54, + "grad_norm": 1.8616579008807657, + "learning_rate": 4.662975226430863e-06, + "loss": 0.5476, "step": 7551 }, { - "epoch": 0.79, - "grad_norm": 2.204865595293493, - "learning_rate": 1.065396748960335e-06, - "loss": 0.6199, + "epoch": 0.54, + "grad_norm": 1.9274266295411446, + "learning_rate": 4.661828681926283e-06, + "loss": 0.5581, "step": 7552 }, { - "epoch": 0.79, - "grad_norm": 2.0913674075484185, - "learning_rate": 1.06434537020556e-06, - "loss": 0.5869, + "epoch": 0.54, + "grad_norm": 2.338469162586638, + "learning_rate": 4.660682155285059e-06, + "loss": 0.5623, "step": 7553 }, { - "epoch": 0.79, - "grad_norm": 2.7478603902570047, - "learning_rate": 1.0632944486837642e-06, - "loss": 0.5817, + "epoch": 0.54, + "grad_norm": 0.7120003277831042, + "learning_rate": 4.659535646567751e-06, + "loss": 0.4514, "step": 7554 }, { - "epoch": 0.8, - "grad_norm": 2.327839999993541, - "learning_rate": 1.0622439845170385e-06, - "loss": 0.6111, + "epoch": 0.54, + "grad_norm": 2.235764161155742, + "learning_rate": 4.658389155834926e-06, + "loss": 0.656, "step": 7555 }, { - "epoch": 0.8, - "grad_norm": 3.0045359616694594, - "learning_rate": 1.061193977827425e-06, - "loss": 0.609, + "epoch": 0.54, + "grad_norm": 1.764842734061612, + "learning_rate": 4.657242683147143e-06, + "loss": 0.6434, "step": 7556 }, { - "epoch": 0.8, - "grad_norm": 2.062618698945312, - "learning_rate": 1.0601444287369073e-06, - "loss": 0.6086, + "epoch": 0.54, + "grad_norm": 1.7623744207581058, + "learning_rate": 4.6560962285649605e-06, + "loss": 0.5433, "step": 7557 }, { - "epoch": 0.8, - "grad_norm": 2.4008049526313604, - "learning_rate": 1.0590953373674229e-06, - "loss": 0.6595, + "epoch": 0.54, + "grad_norm": 1.860569038786662, + "learning_rate": 4.6549497921489434e-06, + "loss": 0.5242, "step": 7558 }, { - "epoch": 0.8, - "grad_norm": 4.1811579275109, - "learning_rate": 1.0580467038408487e-06, - "loss": 0.5707, + "epoch": 0.54, + "grad_norm": 1.910056511685819, + "learning_rate": 4.6538033739596465e-06, + "loss": 0.5789, "step": 7559 }, { - "epoch": 0.8, - "grad_norm": 2.3155227686288713, - "learning_rate": 1.0569985282790145e-06, - "loss": 0.6299, + "epoch": 0.54, + "grad_norm": 2.0076845348305072, + "learning_rate": 4.652656974057629e-06, + "loss": 0.5233, "step": 7560 }, { - "epoch": 0.8, - "grad_norm": 3.5347215291469682, - "learning_rate": 1.0559508108036926e-06, - "loss": 0.5662, + "epoch": 0.54, + "grad_norm": 1.7039585328243971, + "learning_rate": 4.651510592503445e-06, + "loss": 0.5459, "step": 7561 }, { - "epoch": 0.8, - "grad_norm": 2.007580421779567, - "learning_rate": 1.0549035515366052e-06, - "loss": 0.5217, + "epoch": 0.54, + "grad_norm": 1.7961498695469726, + "learning_rate": 4.650364229357655e-06, + "loss": 0.5605, "step": 7562 }, { - "epoch": 0.8, - "grad_norm": 3.308048869247993, - "learning_rate": 1.0538567505994175e-06, - "loss": 0.6871, + "epoch": 0.54, + "grad_norm": 2.052907591493439, + "learning_rate": 4.64921788468081e-06, + "loss": 0.5585, "step": 7563 }, { - "epoch": 0.8, - "grad_norm": 2.425142794535516, - "learning_rate": 1.052810408113746e-06, - "loss": 0.643, + "epoch": 0.54, + "grad_norm": 1.6352697779451293, + "learning_rate": 4.6480715585334656e-06, + "loss": 0.4648, "step": 7564 }, { - "epoch": 0.8, - "grad_norm": 5.639780916922588, - "learning_rate": 1.051764524201152e-06, - "loss": 0.5547, + "epoch": 0.54, + "grad_norm": 3.5300353010117553, + "learning_rate": 4.646925250976172e-06, + "loss": 0.5343, "step": 7565 }, { - "epoch": 0.8, - "grad_norm": 2.474431995538235, - "learning_rate": 1.0507190989831412e-06, - "loss": 0.632, + "epoch": 0.54, + "grad_norm": 1.7139736828437666, + "learning_rate": 4.645778962069485e-06, + "loss": 0.5419, "step": 7566 }, { - "epoch": 0.8, - "grad_norm": 3.2149333641540214, - "learning_rate": 1.0496741325811705e-06, - "loss": 0.6219, + "epoch": 0.54, + "grad_norm": 1.8073851151583031, + "learning_rate": 4.644632691873953e-06, + "loss": 0.5287, "step": 7567 }, { - "epoch": 0.8, - "grad_norm": 2.0743039032781865, - "learning_rate": 1.0486296251166383e-06, - "loss": 0.5175, + "epoch": 0.54, + "grad_norm": 1.9769091516347552, + "learning_rate": 4.6434864404501265e-06, + "loss": 0.5874, "step": 7568 }, { - "epoch": 0.8, - "grad_norm": 2.633639411660315, - "learning_rate": 1.0475855767108956e-06, - "loss": 0.6497, + "epoch": 0.54, + "grad_norm": 1.4654161779858512, + "learning_rate": 4.642340207858552e-06, + "loss": 0.5115, "step": 7569 }, { - "epoch": 0.8, - "grad_norm": 2.457721991255091, - "learning_rate": 1.0465419874852338e-06, - "loss": 0.5951, + "epoch": 0.54, + "grad_norm": 2.196747630514247, + "learning_rate": 4.641193994159783e-06, + "loss": 0.5338, "step": 7570 }, { - "epoch": 0.8, - "grad_norm": 2.239650791365524, - "learning_rate": 1.0454988575608976e-06, - "loss": 0.5779, + "epoch": 0.54, + "grad_norm": 1.7920066476461174, + "learning_rate": 4.640047799414362e-06, + "loss": 0.5317, "step": 7571 }, { - "epoch": 0.8, - "grad_norm": 3.0137305763833675, - "learning_rate": 1.0444561870590707e-06, - "loss": 0.6572, + "epoch": 0.54, + "grad_norm": 0.7390100588519941, + "learning_rate": 4.638901623682834e-06, + "loss": 0.4587, "step": 7572 }, { - "epoch": 0.8, - "grad_norm": 2.832898876677455, - "learning_rate": 1.0434139761008915e-06, - "loss": 0.592, + "epoch": 0.54, + "grad_norm": 1.6195240535306425, + "learning_rate": 4.637755467025748e-06, + "loss": 0.4743, "step": 7573 }, { - "epoch": 0.8, - "grad_norm": 2.8742909520681805, - "learning_rate": 1.042372224807438e-06, - "loss": 0.6899, + "epoch": 0.54, + "grad_norm": 1.8720107526740195, + "learning_rate": 4.636609329503645e-06, + "loss": 0.5491, "step": 7574 }, { - "epoch": 0.8, - "grad_norm": 2.538021201102594, - "learning_rate": 1.0413309332997385e-06, - "loss": 0.533, + "epoch": 0.54, + "grad_norm": 2.4409551971810557, + "learning_rate": 4.635463211177068e-06, + "loss": 0.5046, "step": 7575 }, { - "epoch": 0.8, - "grad_norm": 2.9699203259048996, - "learning_rate": 1.0402901016987694e-06, - "loss": 0.6227, + "epoch": 0.54, + "grad_norm": 2.1450486629150407, + "learning_rate": 4.634317112106559e-06, + "loss": 0.5481, "step": 7576 }, { - "epoch": 0.8, - "grad_norm": 2.0629324280519654, - "learning_rate": 1.0392497301254489e-06, - "loss": 0.5473, + "epoch": 0.54, + "grad_norm": 1.5210384682958107, + "learning_rate": 4.633171032352661e-06, + "loss": 0.552, "step": 7577 }, { - "epoch": 0.8, - "grad_norm": 2.150602467486783, - "learning_rate": 1.0382098187006463e-06, - "loss": 0.6386, + "epoch": 0.54, + "grad_norm": 1.6018971079252604, + "learning_rate": 4.632024971975912e-06, + "loss": 0.5033, "step": 7578 }, { - "epoch": 0.8, - "grad_norm": 1.012171485780843, - "learning_rate": 1.0371703675451732e-06, - "loss": 0.5258, + "epoch": 0.54, + "grad_norm": 1.5788064522351564, + "learning_rate": 4.630878931036851e-06, + "loss": 0.5311, "step": 7579 }, { - "epoch": 0.8, - "grad_norm": 2.144392113247383, - "learning_rate": 1.0361313767797932e-06, - "loss": 0.5706, + "epoch": 0.54, + "grad_norm": 1.8884491861340373, + "learning_rate": 4.629732909596013e-06, + "loss": 0.558, "step": 7580 }, { - "epoch": 0.8, - "grad_norm": 2.4348220328637216, - "learning_rate": 1.0350928465252103e-06, - "loss": 0.6039, + "epoch": 0.54, + "grad_norm": 0.6871614424468498, + "learning_rate": 4.628586907713939e-06, + "loss": 0.4484, "step": 7581 }, { - "epoch": 0.8, - "grad_norm": 2.6562323332446294, - "learning_rate": 1.0340547769020798e-06, - "loss": 0.5633, + "epoch": 0.54, + "grad_norm": 1.453696925806882, + "learning_rate": 4.627440925451164e-06, + "loss": 0.5224, "step": 7582 }, { - "epoch": 0.8, - "grad_norm": 0.9865343111730549, - "learning_rate": 1.0330171680309996e-06, - "loss": 0.5352, + "epoch": 0.54, + "grad_norm": 1.822978640784443, + "learning_rate": 4.626294962868222e-06, + "loss": 0.5722, "step": 7583 }, { - "epoch": 0.8, - "grad_norm": 3.3099068356431776, - "learning_rate": 1.0319800200325193e-06, - "loss": 0.5597, + "epoch": 0.54, + "grad_norm": 0.7509044937362566, + "learning_rate": 4.6251490200256434e-06, + "loss": 0.4414, "step": 7584 }, { - "epoch": 0.8, - "grad_norm": 2.519735515375185, - "learning_rate": 1.0309433330271285e-06, - "loss": 0.5921, + "epoch": 0.54, + "grad_norm": 1.7214892707522254, + "learning_rate": 4.624003096983966e-06, + "loss": 0.5416, "step": 7585 }, { - "epoch": 0.8, - "grad_norm": 2.187601261956032, - "learning_rate": 1.029907107135269e-06, - "loss": 0.5567, + "epoch": 0.54, + "grad_norm": 1.8441740002425562, + "learning_rate": 4.622857193803719e-06, + "loss": 0.5133, "step": 7586 }, { - "epoch": 0.8, - "grad_norm": 2.348573049735444, - "learning_rate": 1.0288713424773238e-06, - "loss": 0.6187, + "epoch": 0.54, + "grad_norm": 2.068651370457747, + "learning_rate": 4.6217113105454315e-06, + "loss": 0.4887, "step": 7587 }, { - "epoch": 0.8, - "grad_norm": 2.701801842597667, - "learning_rate": 1.027836039173627e-06, - "loss": 0.5421, + "epoch": 0.54, + "grad_norm": 1.7297711968698046, + "learning_rate": 4.620565447269636e-06, + "loss": 0.551, "step": 7588 }, { - "epoch": 0.8, - "grad_norm": 2.717130793307138, - "learning_rate": 1.026801197344458e-06, - "loss": 0.6378, + "epoch": 0.54, + "grad_norm": 2.3613889553371146, + "learning_rate": 4.619419604036858e-06, + "loss": 0.4952, "step": 7589 }, { - "epoch": 0.8, - "grad_norm": 2.3303124612250543, - "learning_rate": 1.0257668171100393e-06, - "loss": 0.6418, + "epoch": 0.54, + "grad_norm": 1.7976198310358638, + "learning_rate": 4.618273780907628e-06, + "loss": 0.5747, "step": 7590 }, { - "epoch": 0.8, - "grad_norm": 2.7662913464027623, - "learning_rate": 1.0247328985905446e-06, - "loss": 0.5441, + "epoch": 0.54, + "grad_norm": 1.869796665322123, + "learning_rate": 4.617127977942468e-06, + "loss": 0.5506, "step": 7591 }, { - "epoch": 0.8, - "grad_norm": 2.544677485406088, - "learning_rate": 1.0236994419060892e-06, - "loss": 0.568, + "epoch": 0.54, + "grad_norm": 1.5941806056331238, + "learning_rate": 4.615982195201907e-06, + "loss": 0.5724, "step": 7592 }, { - "epoch": 0.8, - "grad_norm": 2.899566327372865, - "learning_rate": 1.0226664471767401e-06, - "loss": 0.6502, + "epoch": 0.54, + "grad_norm": 1.8405161065728752, + "learning_rate": 4.614836432746468e-06, + "loss": 0.5445, "step": 7593 }, { - "epoch": 0.8, - "grad_norm": 2.506201535469169, - "learning_rate": 1.021633914522504e-06, - "loss": 0.6079, + "epoch": 0.54, + "grad_norm": 1.7760005902782898, + "learning_rate": 4.613690690636674e-06, + "loss": 0.5305, "step": 7594 }, { - "epoch": 0.8, - "grad_norm": 2.518907467485238, - "learning_rate": 1.0206018440633408e-06, - "loss": 0.5106, + "epoch": 0.54, + "grad_norm": 3.37875202238478, + "learning_rate": 4.612544968933044e-06, + "loss": 0.5059, "step": 7595 }, { - "epoch": 0.8, - "grad_norm": 2.3943093369268227, - "learning_rate": 1.0195702359191507e-06, - "loss": 0.579, + "epoch": 0.54, + "grad_norm": 1.709770939993201, + "learning_rate": 4.611399267696103e-06, + "loss": 0.5428, "step": 7596 }, { - "epoch": 0.8, - "grad_norm": 2.4145954273830927, - "learning_rate": 1.0185390902097857e-06, - "loss": 0.6422, + "epoch": 0.54, + "grad_norm": 1.8408382395978682, + "learning_rate": 4.610253586986371e-06, + "loss": 0.6028, "step": 7597 }, { - "epoch": 0.8, - "grad_norm": 1.921201826914954, - "learning_rate": 1.017508407055039e-06, - "loss": 0.6023, + "epoch": 0.54, + "grad_norm": 0.8089010793998264, + "learning_rate": 4.609107926864365e-06, + "loss": 0.4255, "step": 7598 }, { - "epoch": 0.8, - "grad_norm": 2.937076600981269, - "learning_rate": 1.0164781865746542e-06, - "loss": 0.5655, + "epoch": 0.54, + "grad_norm": 2.0895219430232768, + "learning_rate": 4.607962287390601e-06, + "loss": 0.5383, "step": 7599 }, { - "epoch": 0.8, - "grad_norm": 2.760440801599633, - "learning_rate": 1.0154484288883177e-06, - "loss": 0.6897, + "epoch": 0.54, + "grad_norm": 1.9638562906134536, + "learning_rate": 4.606816668625599e-06, + "loss": 0.5189, "step": 7600 }, { - "epoch": 0.8, - "grad_norm": 2.4696937629349898, - "learning_rate": 1.014419134115664e-06, - "loss": 0.6095, + "epoch": 0.54, + "grad_norm": 1.6023775818054875, + "learning_rate": 4.6056710706298724e-06, + "loss": 0.5142, "step": 7601 }, { - "epoch": 0.8, - "grad_norm": 4.811722355164242, - "learning_rate": 1.0133903023762758e-06, - "loss": 0.6984, + "epoch": 0.54, + "grad_norm": 4.806969880771486, + "learning_rate": 4.604525493463936e-06, + "loss": 0.5302, "step": 7602 }, { - "epoch": 0.8, - "grad_norm": 4.115947584954783, - "learning_rate": 1.0123619337896767e-06, - "loss": 0.5696, + "epoch": 0.54, + "grad_norm": 2.3056568312731347, + "learning_rate": 4.603379937188302e-06, + "loss": 0.4936, "step": 7603 }, { - "epoch": 0.8, - "grad_norm": 2.564549686867802, - "learning_rate": 1.0113340284753425e-06, - "loss": 0.655, + "epoch": 0.54, + "grad_norm": 1.6195062941307052, + "learning_rate": 4.602234401863484e-06, + "loss": 0.5233, "step": 7604 }, { - "epoch": 0.8, - "grad_norm": 12.618329274026946, - "learning_rate": 1.0103065865526895e-06, - "loss": 0.5778, + "epoch": 0.54, + "grad_norm": 1.5169836155437795, + "learning_rate": 4.601088887549994e-06, + "loss": 0.4863, "step": 7605 }, { - "epoch": 0.8, - "grad_norm": 2.8969821170985206, - "learning_rate": 1.0092796081410856e-06, - "loss": 0.5542, + "epoch": 0.54, + "grad_norm": 1.6096347028939986, + "learning_rate": 4.599943394308338e-06, + "loss": 0.5699, "step": 7606 }, { - "epoch": 0.8, - "grad_norm": 2.5199683999249087, - "learning_rate": 1.0082530933598388e-06, - "loss": 0.6197, + "epoch": 0.54, + "grad_norm": 1.4642822096499069, + "learning_rate": 4.5987979221990305e-06, + "loss": 0.5282, "step": 7607 }, { - "epoch": 0.8, - "grad_norm": 3.435215141719612, - "learning_rate": 1.0072270423282104e-06, - "loss": 0.5886, + "epoch": 0.54, + "grad_norm": 1.793618157036969, + "learning_rate": 4.597652471282575e-06, + "loss": 0.5897, "step": 7608 }, { - "epoch": 0.8, - "grad_norm": 2.1869076971904104, - "learning_rate": 1.0062014551654015e-06, - "loss": 0.5902, + "epoch": 0.54, + "grad_norm": 1.6536252474768351, + "learning_rate": 4.59650704161948e-06, + "loss": 0.5637, "step": 7609 }, { - "epoch": 0.8, - "grad_norm": 2.5157709264478054, - "learning_rate": 1.0051763319905622e-06, - "loss": 0.5485, + "epoch": 0.54, + "grad_norm": 2.728008210731531, + "learning_rate": 4.5953616332702485e-06, + "loss": 0.543, "step": 7610 }, { - "epoch": 0.8, - "grad_norm": 2.3517546543055747, - "learning_rate": 1.0041516729227902e-06, - "loss": 0.5734, + "epoch": 0.54, + "grad_norm": 2.215771238026647, + "learning_rate": 4.594216246295387e-06, + "loss": 0.5265, "step": 7611 }, { - "epoch": 0.8, - "grad_norm": 2.447965128681716, - "learning_rate": 1.0031274780811245e-06, - "loss": 0.6666, + "epoch": 0.54, + "grad_norm": 1.9059299452969218, + "learning_rate": 4.5930708807554004e-06, + "loss": 0.581, "step": 7612 }, { - "epoch": 0.8, - "grad_norm": 2.976761782472005, - "learning_rate": 1.0021037475845557e-06, - "loss": 0.5641, + "epoch": 0.54, + "grad_norm": 1.5196534652805553, + "learning_rate": 4.591925536710788e-06, + "loss": 0.44, "step": 7613 }, { - "epoch": 0.8, - "grad_norm": 2.698568711787718, - "learning_rate": 1.0010804815520159e-06, - "loss": 0.6378, + "epoch": 0.54, + "grad_norm": 1.678691175535686, + "learning_rate": 4.5907802142220494e-06, + "loss": 0.53, "step": 7614 }, { - "epoch": 0.8, - "grad_norm": 3.3363776215524092, - "learning_rate": 1.0000576801023876e-06, - "loss": 0.6686, + "epoch": 0.54, + "grad_norm": 1.600846341865788, + "learning_rate": 4.589634913349689e-06, + "loss": 0.5403, "step": 7615 }, { - "epoch": 0.8, - "grad_norm": 3.7415611338554813, - "learning_rate": 9.990353433544935e-07, - "loss": 0.6204, + "epoch": 0.54, + "grad_norm": 3.38717387211115, + "learning_rate": 4.588489634154203e-06, + "loss": 0.5351, "step": 7616 }, { - "epoch": 0.8, - "grad_norm": 3.136268007867056, - "learning_rate": 9.980134714271088e-07, - "loss": 0.7083, + "epoch": 0.54, + "grad_norm": 1.617404804983233, + "learning_rate": 4.587344376696088e-06, + "loss": 0.5559, "step": 7617 }, { - "epoch": 0.8, - "grad_norm": 2.186125465023069, - "learning_rate": 9.969920644389498e-07, - "loss": 0.583, + "epoch": 0.54, + "grad_norm": 2.3817129085341744, + "learning_rate": 4.586199141035841e-06, + "loss": 0.618, "step": 7618 }, { - "epoch": 0.8, - "grad_norm": 2.6754021896396454, - "learning_rate": 9.959711225086822e-07, - "loss": 0.6414, + "epoch": 0.54, + "grad_norm": 1.5826513125942436, + "learning_rate": 4.585053927233957e-06, + "loss": 0.5442, "step": 7619 }, { - "epoch": 0.8, - "grad_norm": 3.3008931024230703, - "learning_rate": 9.949506457549135e-07, - "loss": 0.5452, + "epoch": 0.54, + "grad_norm": 1.5619490953647914, + "learning_rate": 4.583908735350932e-06, + "loss": 0.548, "step": 7620 }, { - "epoch": 0.8, - "grad_norm": 2.6881102400726142, - "learning_rate": 9.93930634296203e-07, - "loss": 0.5804, + "epoch": 0.54, + "grad_norm": 2.2240305781925787, + "learning_rate": 4.582763565447256e-06, + "loss": 0.5641, "step": 7621 }, { - "epoch": 0.8, - "grad_norm": 2.510590511858801, - "learning_rate": 9.929110882510496e-07, - "loss": 0.5921, + "epoch": 0.54, + "grad_norm": 3.940643336011773, + "learning_rate": 4.581618417583423e-06, + "loss": 0.4832, "step": 7622 }, { - "epoch": 0.8, - "grad_norm": 2.218316814824091, - "learning_rate": 9.91892007737903e-07, - "loss": 0.6482, + "epoch": 0.54, + "grad_norm": 2.0879022804492866, + "learning_rate": 4.580473291819924e-06, + "loss": 0.5123, "step": 7623 }, { - "epoch": 0.8, - "grad_norm": 2.3200140091226107, - "learning_rate": 9.908733928751574e-07, - "loss": 0.5587, + "epoch": 0.54, + "grad_norm": 1.8394673957766323, + "learning_rate": 4.579328188217247e-06, + "loss": 0.5405, "step": 7624 }, { - "epoch": 0.8, - "grad_norm": 2.7818896360301317, - "learning_rate": 9.89855243781151e-07, - "loss": 0.6569, + "epoch": 0.54, + "grad_norm": 1.9240235134683301, + "learning_rate": 4.578183106835878e-06, + "loss": 0.5288, "step": 7625 }, { - "epoch": 0.8, - "grad_norm": 2.972728123297002, - "learning_rate": 9.888375605741713e-07, - "loss": 0.587, + "epoch": 0.54, + "grad_norm": 2.3315921907607766, + "learning_rate": 4.577038047736309e-06, + "loss": 0.4827, "step": 7626 }, { - "epoch": 0.8, - "grad_norm": 2.467181443995698, - "learning_rate": 9.87820343372447e-07, - "loss": 0.6413, + "epoch": 0.54, + "grad_norm": 3.187533379601238, + "learning_rate": 4.575893010979023e-06, + "loss": 0.5296, "step": 7627 }, { - "epoch": 0.8, - "grad_norm": 2.771873420545783, - "learning_rate": 9.868035922941594e-07, - "loss": 0.6314, + "epoch": 0.54, + "grad_norm": 1.6058530730610339, + "learning_rate": 4.574747996624506e-06, + "loss": 0.4814, "step": 7628 }, { - "epoch": 0.8, - "grad_norm": 8.073309761721186, - "learning_rate": 9.857873074574276e-07, - "loss": 0.6316, + "epoch": 0.54, + "grad_norm": 2.020645400653702, + "learning_rate": 4.573603004733238e-06, + "loss": 0.5523, "step": 7629 }, { - "epoch": 0.8, - "grad_norm": 2.3110538134158447, - "learning_rate": 9.847714889803233e-07, - "loss": 0.6234, + "epoch": 0.54, + "grad_norm": 1.689399053340462, + "learning_rate": 4.5724580353657075e-06, + "loss": 0.5511, "step": 7630 }, { - "epoch": 0.8, - "grad_norm": 2.3475214347720086, - "learning_rate": 9.8375613698086e-07, - "loss": 0.5873, + "epoch": 0.54, + "grad_norm": 2.2100330188767803, + "learning_rate": 4.571313088582392e-06, + "loss": 0.4462, "step": 7631 }, { - "epoch": 0.8, - "grad_norm": 3.338962974057601, - "learning_rate": 9.827412515770003e-07, - "loss": 0.6285, + "epoch": 0.54, + "grad_norm": 1.8245184809011534, + "learning_rate": 4.57016816444377e-06, + "loss": 0.6503, "step": 7632 }, { - "epoch": 0.8, - "grad_norm": 2.4511140443396053, - "learning_rate": 9.817268328866474e-07, - "loss": 0.5206, + "epoch": 0.54, + "grad_norm": 2.045863856493016, + "learning_rate": 4.569023263010321e-06, + "loss": 0.5093, "step": 7633 }, { - "epoch": 0.8, - "grad_norm": 2.6141500186568076, - "learning_rate": 9.807128810276573e-07, - "loss": 0.6173, + "epoch": 0.54, + "grad_norm": 1.6799133979958254, + "learning_rate": 4.567878384342526e-06, + "loss": 0.5503, "step": 7634 }, { - "epoch": 0.8, - "grad_norm": 2.6588577517347343, - "learning_rate": 9.796993961178247e-07, - "loss": 0.5779, + "epoch": 0.54, + "grad_norm": 1.9077899021463323, + "learning_rate": 4.5667335285008585e-06, + "loss": 0.5683, "step": 7635 }, { - "epoch": 0.8, - "grad_norm": 2.2544484813883354, - "learning_rate": 9.786863782748946e-07, - "loss": 0.6226, + "epoch": 0.54, + "grad_norm": 1.8564125806851104, + "learning_rate": 4.565588695545793e-06, + "loss": 0.5852, "step": 7636 }, { - "epoch": 0.8, - "grad_norm": 2.7154960137258373, - "learning_rate": 9.776738276165576e-07, - "loss": 0.5301, + "epoch": 0.54, + "grad_norm": 1.612345130815199, + "learning_rate": 4.5644438855378055e-06, + "loss": 0.522, "step": 7637 }, { - "epoch": 0.8, - "grad_norm": 2.8331620353922093, - "learning_rate": 9.76661744260447e-07, - "loss": 0.6592, + "epoch": 0.54, + "grad_norm": 1.7748344477176585, + "learning_rate": 4.563299098537368e-06, + "loss": 0.5378, "step": 7638 }, { - "epoch": 0.8, - "grad_norm": 2.8596209122623604, - "learning_rate": 9.75650128324146e-07, - "loss": 0.6592, + "epoch": 0.54, + "grad_norm": 2.0720019144953468, + "learning_rate": 4.562154334604952e-06, + "loss": 0.5841, "step": 7639 }, { - "epoch": 0.8, - "grad_norm": 0.9617449759052116, - "learning_rate": 9.746389799251783e-07, - "loss": 0.5056, + "epoch": 0.54, + "grad_norm": 1.7675219058203713, + "learning_rate": 4.561009593801027e-06, + "loss": 0.5636, "step": 7640 }, { - "epoch": 0.8, - "grad_norm": 2.6375463130140555, - "learning_rate": 9.736282991810191e-07, - "loss": 0.649, + "epoch": 0.54, + "grad_norm": 1.9478367315395415, + "learning_rate": 4.559864876186062e-06, + "loss": 0.5205, "step": 7641 }, { - "epoch": 0.8, - "grad_norm": 3.170328479172141, - "learning_rate": 9.72618086209084e-07, - "loss": 0.6085, + "epoch": 0.54, + "grad_norm": 1.5797460886108745, + "learning_rate": 4.558720181820527e-06, + "loss": 0.5207, "step": 7642 }, { - "epoch": 0.8, - "grad_norm": 2.0826307503241073, - "learning_rate": 9.71608341126739e-07, - "loss": 0.5132, + "epoch": 0.54, + "grad_norm": 1.9834704958813896, + "learning_rate": 4.557575510764888e-06, + "loss": 0.6434, "step": 7643 }, { - "epoch": 0.8, - "grad_norm": 5.426019291056326, - "learning_rate": 9.705990640512909e-07, - "loss": 0.5804, + "epoch": 0.54, + "grad_norm": 1.7054655715297211, + "learning_rate": 4.5564308630796075e-06, + "loss": 0.5256, "step": 7644 }, { - "epoch": 0.8, - "grad_norm": 2.8657766408804495, - "learning_rate": 9.695902550999953e-07, - "loss": 0.6634, + "epoch": 0.54, + "grad_norm": 1.847900584734915, + "learning_rate": 4.555286238825154e-06, + "loss": 0.5561, "step": 7645 }, { - "epoch": 0.8, - "grad_norm": 2.1192272871905335, - "learning_rate": 9.685819143900544e-07, - "loss": 0.575, + "epoch": 0.54, + "grad_norm": 1.9479305743045578, + "learning_rate": 4.554141638061987e-06, + "loss": 0.5458, "step": 7646 }, { - "epoch": 0.8, - "grad_norm": 2.9730510980964873, - "learning_rate": 9.675740420386132e-07, - "loss": 0.6602, + "epoch": 0.54, + "grad_norm": 1.631118807804268, + "learning_rate": 4.5529970608505695e-06, + "loss": 0.4943, "step": 7647 }, { - "epoch": 0.8, - "grad_norm": 2.8204604472554924, - "learning_rate": 9.66566638162762e-07, - "loss": 0.6495, + "epoch": 0.54, + "grad_norm": 2.0575653056875907, + "learning_rate": 4.551852507251361e-06, + "loss": 0.54, "step": 7648 }, { - "epoch": 0.8, - "grad_norm": 2.539741388640035, - "learning_rate": 9.655597028795394e-07, - "loss": 0.6173, + "epoch": 0.54, + "grad_norm": 2.136427946294261, + "learning_rate": 4.550707977324823e-06, + "loss": 0.4625, "step": 7649 }, { - "epoch": 0.81, - "grad_norm": 2.5656949751155484, - "learning_rate": 9.64553236305929e-07, - "loss": 0.6095, + "epoch": 0.54, + "grad_norm": 1.939139534726499, + "learning_rate": 4.549563471131412e-06, + "loss": 0.5649, "step": 7650 }, { - "epoch": 0.81, - "grad_norm": 2.450376476628125, - "learning_rate": 9.635472385588573e-07, - "loss": 0.5739, + "epoch": 0.54, + "grad_norm": 1.800416644486573, + "learning_rate": 4.548418988731585e-06, + "loss": 0.5431, "step": 7651 }, { - "epoch": 0.81, - "grad_norm": 2.6482970996607156, - "learning_rate": 9.625417097552003e-07, - "loss": 0.6081, + "epoch": 0.54, + "grad_norm": 2.10889282232532, + "learning_rate": 4.547274530185795e-06, + "loss": 0.4946, "step": 7652 }, { - "epoch": 0.81, - "grad_norm": 2.6667414141134174, - "learning_rate": 9.615366500117757e-07, - "loss": 0.6578, + "epoch": 0.54, + "grad_norm": 1.6960690553671018, + "learning_rate": 4.546130095554501e-06, + "loss": 0.5471, "step": 7653 }, { - "epoch": 0.81, - "grad_norm": 3.74349389755696, - "learning_rate": 9.6053205944535e-07, - "loss": 0.5548, + "epoch": 0.54, + "grad_norm": 2.064963995183518, + "learning_rate": 4.544985684898151e-06, + "loss": 0.4973, "step": 7654 }, { - "epoch": 0.81, - "grad_norm": 3.010896829422771, - "learning_rate": 9.595279381726308e-07, - "loss": 0.5475, + "epoch": 0.54, + "grad_norm": 2.1727105502548274, + "learning_rate": 4.543841298277201e-06, + "loss": 0.5078, "step": 7655 }, { - "epoch": 0.81, - "grad_norm": 2.4405802174170725, - "learning_rate": 9.58524286310278e-07, - "loss": 0.6354, + "epoch": 0.54, + "grad_norm": 1.6977705730668737, + "learning_rate": 4.542696935752098e-06, + "loss": 0.5315, "step": 7656 }, { - "epoch": 0.81, - "grad_norm": 4.919810726968023, - "learning_rate": 9.575211039748893e-07, - "loss": 0.5448, + "epoch": 0.54, + "grad_norm": 0.7393213659233616, + "learning_rate": 4.541552597383293e-06, + "loss": 0.4467, "step": 7657 }, { - "epoch": 0.81, - "grad_norm": 2.363605925097881, - "learning_rate": 9.565183912830134e-07, - "loss": 0.6191, + "epoch": 0.54, + "grad_norm": 1.5177484012035631, + "learning_rate": 4.540408283231234e-06, + "loss": 0.4986, "step": 7658 }, { - "epoch": 0.81, - "grad_norm": 2.4814246714387385, - "learning_rate": 9.555161483511434e-07, - "loss": 0.6056, + "epoch": 0.54, + "grad_norm": 1.6127223065519982, + "learning_rate": 4.539263993356365e-06, + "loss": 0.5877, "step": 7659 }, { - "epoch": 0.81, - "grad_norm": 2.3442144571403394, - "learning_rate": 9.545143752957143e-07, - "loss": 0.5799, + "epoch": 0.54, + "grad_norm": 1.9509470657316377, + "learning_rate": 4.538119727819133e-06, + "loss": 0.494, "step": 7660 }, { - "epoch": 0.81, - "grad_norm": 0.9844963618684487, - "learning_rate": 9.535130722331121e-07, - "loss": 0.5501, + "epoch": 0.54, + "grad_norm": 1.8727293336229307, + "learning_rate": 4.536975486679983e-06, + "loss": 0.4461, "step": 7661 }, { - "epoch": 0.81, - "grad_norm": 3.841148309195625, - "learning_rate": 9.525122392796632e-07, - "loss": 0.5798, + "epoch": 0.54, + "grad_norm": 1.6301176566985953, + "learning_rate": 4.535831269999358e-06, + "loss": 0.5677, "step": 7662 }, { - "epoch": 0.81, - "grad_norm": 2.6946034136889114, - "learning_rate": 9.515118765516429e-07, - "loss": 0.624, + "epoch": 0.54, + "grad_norm": 1.692768504940511, + "learning_rate": 4.534687077837695e-06, + "loss": 0.539, "step": 7663 }, { - "epoch": 0.81, - "grad_norm": 2.614312745110826, - "learning_rate": 9.505119841652688e-07, - "loss": 0.6012, + "epoch": 0.54, + "grad_norm": 1.4946616473925367, + "learning_rate": 4.5335429102554405e-06, + "loss": 0.5315, "step": 7664 }, { - "epoch": 0.81, - "grad_norm": 2.2341716999178733, - "learning_rate": 9.495125622367079e-07, - "loss": 0.699, + "epoch": 0.54, + "grad_norm": 1.4675399284294024, + "learning_rate": 4.532398767313029e-06, + "loss": 0.4846, "step": 7665 }, { - "epoch": 0.81, - "grad_norm": 2.454492464172891, - "learning_rate": 9.485136108820675e-07, - "loss": 0.5752, + "epoch": 0.54, + "grad_norm": 0.7127809761190446, + "learning_rate": 4.5312546490708995e-06, + "loss": 0.4382, "step": 7666 }, { - "epoch": 0.81, - "grad_norm": 2.7299044786206266, - "learning_rate": 9.475151302174052e-07, - "loss": 0.6267, + "epoch": 0.54, + "grad_norm": 2.2180101341416854, + "learning_rate": 4.530110555589485e-06, + "loss": 0.5019, "step": 7667 }, { - "epoch": 0.81, - "grad_norm": 4.46712121179473, - "learning_rate": 9.465171203587192e-07, - "loss": 0.69, + "epoch": 0.54, + "grad_norm": 1.8957032705876284, + "learning_rate": 4.5289664869292264e-06, + "loss": 0.4855, "step": 7668 }, { - "epoch": 0.81, - "grad_norm": 2.8581757438763984, - "learning_rate": 9.455195814219581e-07, - "loss": 0.6329, + "epoch": 0.54, + "grad_norm": 2.1571479308015373, + "learning_rate": 4.527822443150552e-06, + "loss": 0.5095, "step": 7669 }, { - "epoch": 0.81, - "grad_norm": 2.7749337082880636, - "learning_rate": 9.445225135230102e-07, - "loss": 0.5863, + "epoch": 0.54, + "grad_norm": 0.6686081120272963, + "learning_rate": 4.526678424313896e-06, + "loss": 0.4399, "step": 7670 }, { - "epoch": 0.81, - "grad_norm": 2.9200575440191554, - "learning_rate": 9.435259167777139e-07, - "loss": 0.6594, + "epoch": 0.54, + "grad_norm": 2.0223543972476343, + "learning_rate": 4.525534430479691e-06, + "loss": 0.5347, "step": 7671 }, { - "epoch": 0.81, - "grad_norm": 3.1961551877877343, - "learning_rate": 9.425297913018517e-07, - "loss": 0.6262, + "epoch": 0.54, + "grad_norm": 1.791821063008179, + "learning_rate": 4.524390461708365e-06, + "loss": 0.5368, "step": 7672 }, { - "epoch": 0.81, - "grad_norm": 5.621987087951769, - "learning_rate": 9.41534137211148e-07, - "loss": 0.6806, + "epoch": 0.54, + "grad_norm": 1.79738386544397, + "learning_rate": 4.523246518060346e-06, + "loss": 0.5381, "step": 7673 }, { - "epoch": 0.81, - "grad_norm": 2.37550460624206, - "learning_rate": 9.405389546212779e-07, - "loss": 0.4877, + "epoch": 0.54, + "grad_norm": 1.6233618135651613, + "learning_rate": 4.52210259959606e-06, + "loss": 0.5987, "step": 7674 }, { - "epoch": 0.81, - "grad_norm": 2.1856374832479073, - "learning_rate": 9.395442436478558e-07, - "loss": 0.6366, + "epoch": 0.54, + "grad_norm": 1.5249505168901318, + "learning_rate": 4.520958706375936e-06, + "loss": 0.5532, "step": 7675 }, { - "epoch": 0.81, - "grad_norm": 2.3120929072875986, - "learning_rate": 9.385500044064472e-07, - "loss": 0.5728, + "epoch": 0.54, + "grad_norm": 1.9569769009766622, + "learning_rate": 4.519814838460395e-06, + "loss": 0.562, "step": 7676 }, { - "epoch": 0.81, - "grad_norm": 11.267589648078419, - "learning_rate": 9.375562370125574e-07, - "loss": 0.6151, + "epoch": 0.54, + "grad_norm": 1.8764533455404855, + "learning_rate": 4.518670995909862e-06, + "loss": 0.546, "step": 7677 }, { - "epoch": 0.81, - "grad_norm": 3.3911562826517248, - "learning_rate": 9.365629415816418e-07, - "loss": 0.5745, + "epoch": 0.54, + "grad_norm": 1.6636347110932128, + "learning_rate": 4.517527178784756e-06, + "loss": 0.543, "step": 7678 }, { - "epoch": 0.81, - "grad_norm": 2.0931728619715138, - "learning_rate": 9.355701182290961e-07, - "loss": 0.5957, + "epoch": 0.54, + "grad_norm": 2.6867445464250483, + "learning_rate": 4.516383387145503e-06, + "loss": 0.5423, "step": 7679 }, { - "epoch": 0.81, - "grad_norm": 2.4056655342238416, - "learning_rate": 9.345777670702649e-07, - "loss": 0.6034, + "epoch": 0.54, + "grad_norm": 0.7648375856884928, + "learning_rate": 4.515239621052516e-06, + "loss": 0.4537, "step": 7680 }, { - "epoch": 0.81, - "grad_norm": 3.31101973582715, - "learning_rate": 9.335858882204385e-07, - "loss": 0.5717, + "epoch": 0.55, + "grad_norm": 1.8285481330077449, + "learning_rate": 4.514095880566216e-06, + "loss": 0.4842, "step": 7681 }, { - "epoch": 0.81, - "grad_norm": 3.2633888627628336, - "learning_rate": 9.325944817948485e-07, - "loss": 0.6403, + "epoch": 0.55, + "grad_norm": 1.7989338050923456, + "learning_rate": 4.512952165747016e-06, + "loss": 0.5517, "step": 7682 }, { - "epoch": 0.81, - "grad_norm": 2.507966888011093, - "learning_rate": 9.316035479086727e-07, - "loss": 0.6023, + "epoch": 0.55, + "grad_norm": 1.7144875513927733, + "learning_rate": 4.5118084766553346e-06, + "loss": 0.5214, "step": 7683 }, { - "epoch": 0.81, - "grad_norm": 2.6707445301357104, - "learning_rate": 9.306130866770364e-07, - "loss": 0.694, + "epoch": 0.55, + "grad_norm": 1.8973409318100385, + "learning_rate": 4.510664813351583e-06, + "loss": 0.5293, "step": 7684 }, { - "epoch": 0.81, - "grad_norm": 2.3698584587195777, - "learning_rate": 9.296230982150095e-07, - "loss": 0.499, + "epoch": 0.55, + "grad_norm": 1.6250332793448201, + "learning_rate": 4.509521175896173e-06, + "loss": 0.5387, "step": 7685 }, { - "epoch": 0.81, - "grad_norm": 2.646352948765854, - "learning_rate": 9.28633582637603e-07, - "loss": 0.5998, + "epoch": 0.55, + "grad_norm": 1.7591710891050234, + "learning_rate": 4.508377564349518e-06, + "loss": 0.5731, "step": 7686 }, { - "epoch": 0.81, - "grad_norm": 2.3308088558522724, - "learning_rate": 9.276445400597795e-07, - "loss": 0.6442, + "epoch": 0.55, + "grad_norm": 2.052363344664565, + "learning_rate": 4.5072339787720265e-06, + "loss": 0.5895, "step": 7687 }, { - "epoch": 0.81, - "grad_norm": 2.7184765380374314, - "learning_rate": 9.266559705964401e-07, - "loss": 0.7093, + "epoch": 0.55, + "grad_norm": 1.6699829017765178, + "learning_rate": 4.5060904192241055e-06, + "loss": 0.5004, "step": 7688 }, { - "epoch": 0.81, - "grad_norm": 2.623044957668708, - "learning_rate": 9.256678743624364e-07, - "loss": 0.6417, + "epoch": 0.55, + "grad_norm": 2.2388971716545374, + "learning_rate": 4.50494688576616e-06, + "loss": 0.4857, "step": 7689 }, { - "epoch": 0.81, - "grad_norm": 5.079400937660165, - "learning_rate": 9.246802514725601e-07, - "loss": 0.6116, + "epoch": 0.55, + "grad_norm": 1.7273187411995266, + "learning_rate": 4.503803378458601e-06, + "loss": 0.4742, "step": 7690 }, { - "epoch": 0.81, - "grad_norm": 2.2731708710443503, - "learning_rate": 9.236931020415529e-07, - "loss": 0.5756, + "epoch": 0.55, + "grad_norm": 1.6367435543000965, + "learning_rate": 4.502659897361826e-06, + "loss": 0.5364, "step": 7691 }, { - "epoch": 0.81, - "grad_norm": 3.3136227736096924, - "learning_rate": 9.227064261840962e-07, - "loss": 0.6093, + "epoch": 0.55, + "grad_norm": 1.473924780610634, + "learning_rate": 4.501516442536242e-06, + "loss": 0.5576, "step": 7692 }, { - "epoch": 0.81, - "grad_norm": 2.1475285005337974, - "learning_rate": 9.21720224014821e-07, - "loss": 0.6814, + "epoch": 0.55, + "grad_norm": 2.1117095342295813, + "learning_rate": 4.500373014042247e-06, + "loss": 0.5642, "step": 7693 }, { - "epoch": 0.81, - "grad_norm": 2.9968897146706133, - "learning_rate": 9.207344956483022e-07, - "loss": 0.6033, + "epoch": 0.55, + "grad_norm": 1.688114138860154, + "learning_rate": 4.499229611940244e-06, + "loss": 0.5667, "step": 7694 }, { - "epoch": 0.81, - "grad_norm": 2.6350679079149866, - "learning_rate": 9.197492411990571e-07, - "loss": 0.626, + "epoch": 0.55, + "grad_norm": 2.164235412003158, + "learning_rate": 4.4980862362906295e-06, + "loss": 0.5057, "step": 7695 }, { - "epoch": 0.81, - "grad_norm": 2.72376089543527, - "learning_rate": 9.187644607815499e-07, - "loss": 0.6423, + "epoch": 0.55, + "grad_norm": 1.792499307726391, + "learning_rate": 4.496942887153802e-06, + "loss": 0.5301, "step": 7696 }, { - "epoch": 0.81, - "grad_norm": 2.8530684492710217, - "learning_rate": 9.1778015451019e-07, - "loss": 0.6458, + "epoch": 0.55, + "grad_norm": 1.637483835497282, + "learning_rate": 4.495799564590152e-06, + "loss": 0.518, "step": 7697 }, { - "epoch": 0.81, - "grad_norm": 2.0539321212137107, - "learning_rate": 9.16796322499332e-07, - "loss": 0.6108, + "epoch": 0.55, + "grad_norm": 0.7759101118563858, + "learning_rate": 4.494656268660081e-06, + "loss": 0.4614, "step": 7698 }, { - "epoch": 0.81, - "grad_norm": 2.7055060163420066, - "learning_rate": 9.15812964863273e-07, - "loss": 0.5557, + "epoch": 0.55, + "grad_norm": 1.5928222189726307, + "learning_rate": 4.493512999423976e-06, + "loss": 0.527, "step": 7699 }, { - "epoch": 0.81, - "grad_norm": 2.334028049190322, - "learning_rate": 9.148300817162587e-07, - "loss": 0.5686, + "epoch": 0.55, + "grad_norm": 1.5662509683234036, + "learning_rate": 4.4923697569422305e-06, + "loss": 0.5529, "step": 7700 }, { - "epoch": 0.81, - "grad_norm": 0.8483172248492421, - "learning_rate": 9.138476731724749e-07, - "loss": 0.5497, + "epoch": 0.55, + "grad_norm": 1.8520892995384461, + "learning_rate": 4.491226541275236e-06, + "loss": 0.5561, "step": 7701 }, { - "epoch": 0.81, - "grad_norm": 2.586655116930661, - "learning_rate": 9.128657393460583e-07, - "loss": 0.5979, + "epoch": 0.55, + "grad_norm": 2.0865850017145178, + "learning_rate": 4.49008335248338e-06, + "loss": 0.5837, "step": 7702 }, { - "epoch": 0.81, - "grad_norm": 3.314047969656968, - "learning_rate": 9.118842803510841e-07, - "loss": 0.6292, + "epoch": 0.55, + "grad_norm": 1.4921219983983958, + "learning_rate": 4.488940190627049e-06, + "loss": 0.5278, "step": 7703 }, { - "epoch": 0.81, - "grad_norm": 2.410214810373217, - "learning_rate": 9.10903296301578e-07, - "loss": 0.6381, + "epoch": 0.55, + "grad_norm": 1.8852311466559046, + "learning_rate": 4.487797055766626e-06, + "loss": 0.5773, "step": 7704 }, { - "epoch": 0.81, - "grad_norm": 3.104473643611443, - "learning_rate": 9.099227873115047e-07, - "loss": 0.6241, + "epoch": 0.55, + "grad_norm": 1.5774348349540475, + "learning_rate": 4.486653947962501e-06, + "loss": 0.4948, "step": 7705 }, { - "epoch": 0.81, - "grad_norm": 3.4899505415847676, - "learning_rate": 9.089427534947792e-07, - "loss": 0.6409, + "epoch": 0.55, + "grad_norm": 2.174435118082163, + "learning_rate": 4.485510867275052e-06, + "loss": 0.5067, "step": 7706 }, { - "epoch": 0.81, - "grad_norm": 2.1792155269408617, - "learning_rate": 9.0796319496526e-07, - "loss": 0.6243, + "epoch": 0.55, + "grad_norm": 1.9398375378860961, + "learning_rate": 4.484367813764665e-06, + "loss": 0.5453, "step": 7707 }, { - "epoch": 0.81, - "grad_norm": 2.648707594215365, - "learning_rate": 9.069841118367462e-07, - "loss": 0.6185, + "epoch": 0.55, + "grad_norm": 2.2664044771741256, + "learning_rate": 4.4832247874917136e-06, + "loss": 0.4743, "step": 7708 }, { - "epoch": 0.81, - "grad_norm": 2.801759382084992, - "learning_rate": 9.060055042229881e-07, - "loss": 0.6844, + "epoch": 0.55, + "grad_norm": 1.8430589181201231, + "learning_rate": 4.482081788516584e-06, + "loss": 0.5341, "step": 7709 }, { - "epoch": 0.81, - "grad_norm": 2.8450393356650685, - "learning_rate": 9.050273722376746e-07, - "loss": 0.535, + "epoch": 0.55, + "grad_norm": 1.5072951943065, + "learning_rate": 4.480938816899647e-06, + "loss": 0.4804, "step": 7710 }, { - "epoch": 0.81, - "grad_norm": 3.237580480646928, - "learning_rate": 9.04049715994445e-07, - "loss": 0.6118, + "epoch": 0.55, + "grad_norm": 1.6013337324441983, + "learning_rate": 4.479795872701282e-06, + "loss": 0.4927, "step": 7711 }, { - "epoch": 0.81, - "grad_norm": 4.029884410505219, - "learning_rate": 9.030725356068781e-07, - "loss": 0.642, + "epoch": 0.55, + "grad_norm": 1.635300689955287, + "learning_rate": 4.478652955981858e-06, + "loss": 0.5702, "step": 7712 }, { - "epoch": 0.81, - "grad_norm": 2.5945698031312086, - "learning_rate": 9.020958311885019e-07, - "loss": 0.6349, + "epoch": 0.55, + "grad_norm": 1.9098950651326774, + "learning_rate": 4.4775100668017554e-06, + "loss": 0.5955, "step": 7713 }, { - "epoch": 0.81, - "grad_norm": 2.455581536260283, - "learning_rate": 9.011196028527853e-07, - "loss": 0.6013, + "epoch": 0.55, + "grad_norm": 1.5541605174661794, + "learning_rate": 4.4763672052213394e-06, + "loss": 0.5604, "step": 7714 }, { - "epoch": 0.81, - "grad_norm": 0.9942779258867724, - "learning_rate": 9.001438507131444e-07, - "loss": 0.527, + "epoch": 0.55, + "grad_norm": 1.9236072208088693, + "learning_rate": 4.475224371300983e-06, + "loss": 0.5446, "step": 7715 }, { - "epoch": 0.81, - "grad_norm": 2.700359868586655, - "learning_rate": 8.991685748829404e-07, - "loss": 0.6477, + "epoch": 0.55, + "grad_norm": 1.7139521246929879, + "learning_rate": 4.474081565101053e-06, + "loss": 0.4822, "step": 7716 }, { - "epoch": 0.81, - "grad_norm": 2.104403745631631, - "learning_rate": 8.981937754754777e-07, - "loss": 0.6163, + "epoch": 0.55, + "grad_norm": 1.7102231859800985, + "learning_rate": 4.4729387866819164e-06, + "loss": 0.5394, "step": 7717 }, { - "epoch": 0.81, - "grad_norm": 2.5972794158820482, - "learning_rate": 8.972194526040034e-07, - "loss": 0.5271, + "epoch": 0.55, + "grad_norm": 5.959213103141516, + "learning_rate": 4.471796036103941e-06, + "loss": 0.5575, "step": 7718 }, { - "epoch": 0.81, - "grad_norm": 2.4719633270189583, - "learning_rate": 8.962456063817132e-07, - "loss": 0.6623, + "epoch": 0.55, + "grad_norm": 1.648044139865756, + "learning_rate": 4.4706533134274854e-06, + "loss": 0.4833, "step": 7719 }, { - "epoch": 0.81, - "grad_norm": 0.9667484827218678, - "learning_rate": 8.95272236921747e-07, - "loss": 0.5675, + "epoch": 0.55, + "grad_norm": 1.7298676704751736, + "learning_rate": 4.469510618712918e-06, + "loss": 0.5621, "step": 7720 }, { - "epoch": 0.81, - "grad_norm": 2.425564331369107, - "learning_rate": 8.942993443371856e-07, - "loss": 0.609, + "epoch": 0.55, + "grad_norm": 1.770033057170539, + "learning_rate": 4.4683679520205965e-06, + "loss": 0.5338, "step": 7721 }, { - "epoch": 0.81, - "grad_norm": 2.947734739772372, - "learning_rate": 8.93326928741059e-07, - "loss": 0.6371, + "epoch": 0.55, + "grad_norm": 1.7800747116793412, + "learning_rate": 4.467225313410882e-06, + "loss": 0.5369, "step": 7722 }, { - "epoch": 0.81, - "grad_norm": 2.9080706900479045, - "learning_rate": 8.923549902463374e-07, - "loss": 0.6628, + "epoch": 0.55, + "grad_norm": 2.2214112758991145, + "learning_rate": 4.46608270294413e-06, + "loss": 0.4733, "step": 7723 }, { - "epoch": 0.81, - "grad_norm": 3.8252938892041635, - "learning_rate": 8.913835289659406e-07, - "loss": 0.5489, + "epoch": 0.55, + "grad_norm": 1.8503249784974822, + "learning_rate": 4.4649401206807e-06, + "loss": 0.5322, "step": 7724 }, { - "epoch": 0.81, - "grad_norm": 2.5458974669966805, - "learning_rate": 8.904125450127272e-07, - "loss": 0.6616, + "epoch": 0.55, + "grad_norm": 1.762466857644363, + "learning_rate": 4.463797566680946e-06, + "loss": 0.5468, "step": 7725 }, { - "epoch": 0.81, - "grad_norm": 2.184591112903663, - "learning_rate": 8.894420384995056e-07, - "loss": 0.6075, + "epoch": 0.55, + "grad_norm": 2.0072220912952816, + "learning_rate": 4.462655041005222e-06, + "loss": 0.6015, "step": 7726 }, { - "epoch": 0.81, - "grad_norm": 2.9011418508765647, - "learning_rate": 8.884720095390248e-07, - "loss": 0.6888, + "epoch": 0.55, + "grad_norm": 1.75281044662948, + "learning_rate": 4.4615125437138775e-06, + "loss": 0.5198, "step": 7727 }, { - "epoch": 0.81, - "grad_norm": 2.454534274847728, - "learning_rate": 8.875024582439801e-07, - "loss": 0.6525, + "epoch": 0.55, + "grad_norm": 3.0596949222714342, + "learning_rate": 4.460370074867265e-06, + "loss": 0.5279, "step": 7728 }, { - "epoch": 0.81, - "grad_norm": 2.3930270904791335, - "learning_rate": 8.865333847270135e-07, - "loss": 0.6685, + "epoch": 0.55, + "grad_norm": 2.0054057441784052, + "learning_rate": 4.459227634525736e-06, + "loss": 0.5499, "step": 7729 }, { - "epoch": 0.81, - "grad_norm": 2.5600293308303783, - "learning_rate": 8.855647891007075e-07, - "loss": 0.5978, + "epoch": 0.55, + "grad_norm": 1.839333235657605, + "learning_rate": 4.458085222749636e-06, + "loss": 0.5667, "step": 7730 }, { - "epoch": 0.81, - "grad_norm": 2.437056971277586, - "learning_rate": 8.845966714775894e-07, - "loss": 0.6665, + "epoch": 0.55, + "grad_norm": 2.318173791014407, + "learning_rate": 4.456942839599308e-06, + "loss": 0.5696, "step": 7731 }, { - "epoch": 0.81, - "grad_norm": 21.97578049995487, - "learning_rate": 8.836290319701335e-07, - "loss": 0.5903, + "epoch": 0.55, + "grad_norm": 2.2521098044487142, + "learning_rate": 4.4558004851351015e-06, + "loss": 0.4785, "step": 7732 }, { - "epoch": 0.81, - "grad_norm": 2.878994513782418, - "learning_rate": 8.826618706907585e-07, - "loss": 0.6547, + "epoch": 0.55, + "grad_norm": 1.7663475032848635, + "learning_rate": 4.454658159417356e-06, + "loss": 0.5022, "step": 7733 }, { - "epoch": 0.81, - "grad_norm": 2.6089834334746893, - "learning_rate": 8.816951877518243e-07, - "loss": 0.6462, + "epoch": 0.55, + "grad_norm": 1.7338690966588735, + "learning_rate": 4.453515862506414e-06, + "loss": 0.5425, "step": 7734 }, { - "epoch": 0.81, - "grad_norm": 3.5891885718164533, - "learning_rate": 8.807289832656396e-07, - "loss": 0.6704, + "epoch": 0.55, + "grad_norm": 1.9360545917811034, + "learning_rate": 4.452373594462618e-06, + "loss": 0.6101, "step": 7735 }, { - "epoch": 0.81, - "grad_norm": 2.53450443321136, - "learning_rate": 8.797632573444526e-07, - "loss": 0.5234, + "epoch": 0.55, + "grad_norm": 2.0464884826813248, + "learning_rate": 4.451231355346303e-06, + "loss": 0.5643, "step": 7736 }, { - "epoch": 0.81, - "grad_norm": 2.4367976647015843, - "learning_rate": 8.787980101004612e-07, - "loss": 0.5515, + "epoch": 0.55, + "grad_norm": 1.5005504207468288, + "learning_rate": 4.450089145217808e-06, + "loss": 0.5102, "step": 7737 }, { - "epoch": 0.81, - "grad_norm": 3.393038737753617, - "learning_rate": 8.77833241645803e-07, - "loss": 0.6213, + "epoch": 0.55, + "grad_norm": 1.727017729026395, + "learning_rate": 4.448946964137465e-06, + "loss": 0.5181, "step": 7738 }, { - "epoch": 0.81, - "grad_norm": 2.942620580800737, - "learning_rate": 8.768689520925638e-07, - "loss": 0.5855, + "epoch": 0.55, + "grad_norm": 1.6387893905382804, + "learning_rate": 4.4478048121656145e-06, + "loss": 0.6241, "step": 7739 }, { - "epoch": 0.81, - "grad_norm": 2.2895413377993816, - "learning_rate": 8.759051415527697e-07, - "loss": 0.6244, + "epoch": 0.55, + "grad_norm": 2.48777931300927, + "learning_rate": 4.446662689362583e-06, + "loss": 0.5425, "step": 7740 }, { - "epoch": 0.81, - "grad_norm": 3.1158010305913275, - "learning_rate": 8.749418101383944e-07, - "loss": 0.5865, + "epoch": 0.55, + "grad_norm": 2.4402385396404056, + "learning_rate": 4.445520595788705e-06, + "loss": 0.5177, "step": 7741 }, { - "epoch": 0.81, - "grad_norm": 2.6974129522038877, - "learning_rate": 8.739789579613572e-07, - "loss": 0.5408, + "epoch": 0.55, + "grad_norm": 1.9916151747940631, + "learning_rate": 4.444378531504304e-06, + "loss": 0.5302, "step": 7742 }, { - "epoch": 0.81, - "grad_norm": 2.895780450687752, - "learning_rate": 8.73016585133517e-07, - "loss": 0.6004, + "epoch": 0.55, + "grad_norm": 0.802666306985771, + "learning_rate": 4.443236496569714e-06, + "loss": 0.4505, "step": 7743 }, { - "epoch": 0.81, - "grad_norm": 3.0182236513579874, - "learning_rate": 8.720546917666789e-07, - "loss": 0.6118, + "epoch": 0.55, + "grad_norm": 0.7244211009866435, + "learning_rate": 4.442094491045259e-06, + "loss": 0.4403, "step": 7744 }, { - "epoch": 0.82, - "grad_norm": 2.370869512524687, - "learning_rate": 8.710932779725939e-07, - "loss": 0.6114, + "epoch": 0.55, + "grad_norm": 3.726832690020085, + "learning_rate": 4.440952514991265e-06, + "loss": 0.5703, "step": 7745 }, { - "epoch": 0.82, - "grad_norm": 2.3023012937961846, - "learning_rate": 8.701323438629577e-07, - "loss": 0.5891, + "epoch": 0.55, + "grad_norm": 1.6716270583887567, + "learning_rate": 4.43981056846805e-06, + "loss": 0.5646, "step": 7746 }, { - "epoch": 0.82, - "grad_norm": 2.7894697251608, - "learning_rate": 8.691718895494067e-07, - "loss": 0.5835, + "epoch": 0.55, + "grad_norm": 2.3255347577093994, + "learning_rate": 4.438668651535941e-06, + "loss": 0.5496, "step": 7747 }, { - "epoch": 0.82, - "grad_norm": 2.2253869804344077, - "learning_rate": 8.682119151435258e-07, - "loss": 0.657, + "epoch": 0.55, + "grad_norm": 3.846344863679638, + "learning_rate": 4.437526764255256e-06, + "loss": 0.5129, "step": 7748 }, { - "epoch": 0.82, - "grad_norm": 2.1968761019330536, - "learning_rate": 8.672524207568389e-07, - "loss": 0.5731, + "epoch": 0.55, + "grad_norm": 1.7090142254369698, + "learning_rate": 4.4363849066863115e-06, + "loss": 0.5313, "step": 7749 }, { - "epoch": 0.82, - "grad_norm": 2.0451951033529774, - "learning_rate": 8.6629340650082e-07, - "loss": 0.6249, + "epoch": 0.55, + "grad_norm": 2.232062103018914, + "learning_rate": 4.435243078889429e-06, + "loss": 0.4953, "step": 7750 }, { - "epoch": 0.82, - "grad_norm": 5.290396646074176, - "learning_rate": 8.653348724868843e-07, - "loss": 0.6095, + "epoch": 0.55, + "grad_norm": 2.3528487444536808, + "learning_rate": 4.4341012809249185e-06, + "loss": 0.6096, "step": 7751 }, { - "epoch": 0.82, - "grad_norm": 2.4998002085053828, - "learning_rate": 8.643768188263918e-07, - "loss": 0.5901, + "epoch": 0.55, + "grad_norm": 1.5811698978533302, + "learning_rate": 4.432959512853096e-06, + "loss": 0.5278, "step": 7752 }, { - "epoch": 0.82, - "grad_norm": 2.927706275807229, - "learning_rate": 8.63419245630644e-07, - "loss": 0.6667, + "epoch": 0.55, + "grad_norm": 1.5394815631865082, + "learning_rate": 4.431817774734273e-06, + "loss": 0.5717, "step": 7753 }, { - "epoch": 0.82, - "grad_norm": 2.514050334470586, - "learning_rate": 8.624621530108901e-07, - "loss": 0.5498, + "epoch": 0.55, + "grad_norm": 0.7537684392176923, + "learning_rate": 4.430676066628763e-06, + "loss": 0.4563, "step": 7754 }, { - "epoch": 0.82, - "grad_norm": 2.00269176738163, - "learning_rate": 8.615055410783246e-07, - "loss": 0.5119, + "epoch": 0.55, + "grad_norm": 1.6357750179241812, + "learning_rate": 4.4295343885968706e-06, + "loss": 0.5029, "step": 7755 }, { - "epoch": 0.82, - "grad_norm": 2.2181227482346895, - "learning_rate": 8.605494099440808e-07, - "loss": 0.6086, + "epoch": 0.55, + "grad_norm": 1.5995602443279022, + "learning_rate": 4.428392740698905e-06, + "loss": 0.4934, "step": 7756 }, { - "epoch": 0.82, - "grad_norm": 0.9630978359237016, - "learning_rate": 8.595937597192422e-07, - "loss": 0.5444, + "epoch": 0.55, + "grad_norm": 1.6795486234694066, + "learning_rate": 4.42725112299517e-06, + "loss": 0.5059, "step": 7757 }, { - "epoch": 0.82, - "grad_norm": 4.040120236257973, - "learning_rate": 8.586385905148304e-07, - "loss": 0.5648, + "epoch": 0.55, + "grad_norm": 1.7857132576727037, + "learning_rate": 4.426109535545972e-06, + "loss": 0.4905, "step": 7758 }, { - "epoch": 0.82, - "grad_norm": 2.7334403291410077, - "learning_rate": 8.576839024418165e-07, - "loss": 0.6385, + "epoch": 0.55, + "grad_norm": 1.4770187521344542, + "learning_rate": 4.424967978411615e-06, + "loss": 0.5503, "step": 7759 }, { - "epoch": 0.82, - "grad_norm": 2.2473246156394096, - "learning_rate": 8.567296956111121e-07, - "loss": 0.5395, + "epoch": 0.55, + "grad_norm": 1.8805423314932037, + "learning_rate": 4.423826451652398e-06, + "loss": 0.5298, "step": 7760 }, { - "epoch": 0.82, - "grad_norm": 2.5453764969411057, - "learning_rate": 8.557759701335755e-07, - "loss": 0.6596, + "epoch": 0.55, + "grad_norm": 2.1024821805331517, + "learning_rate": 4.422684955328617e-06, + "loss": 0.5793, "step": 7761 }, { - "epoch": 0.82, - "grad_norm": 3.755171473082308, - "learning_rate": 8.54822726120006e-07, - "loss": 0.5935, + "epoch": 0.55, + "grad_norm": 1.895229857708383, + "learning_rate": 4.421543489500576e-06, + "loss": 0.5528, "step": 7762 }, { - "epoch": 0.82, - "grad_norm": 2.3621820404124114, - "learning_rate": 8.538699636811493e-07, - "loss": 0.6247, + "epoch": 0.55, + "grad_norm": 1.8558857100994492, + "learning_rate": 4.420402054228567e-06, + "loss": 0.6087, "step": 7763 }, { - "epoch": 0.82, - "grad_norm": 2.5573182046892384, - "learning_rate": 8.529176829276964e-07, - "loss": 0.5769, + "epoch": 0.55, + "grad_norm": 6.069955483937339, + "learning_rate": 4.419260649572883e-06, + "loss": 0.5354, "step": 7764 }, { - "epoch": 0.82, - "grad_norm": 2.452128633867755, - "learning_rate": 8.519658839702787e-07, - "loss": 0.5634, + "epoch": 0.55, + "grad_norm": 1.9683629409502692, + "learning_rate": 4.418119275593823e-06, + "loss": 0.5686, "step": 7765 }, { - "epoch": 0.82, - "grad_norm": 3.1439091682655964, - "learning_rate": 8.51014566919473e-07, - "loss": 0.6145, + "epoch": 0.55, + "grad_norm": 1.8786774145762464, + "learning_rate": 4.416977932351672e-06, + "loss": 0.5603, "step": 7766 }, { - "epoch": 0.82, - "grad_norm": 7.479175976072859, - "learning_rate": 8.500637318858018e-07, - "loss": 0.6538, + "epoch": 0.55, + "grad_norm": 1.77384946148783, + "learning_rate": 4.415836619906724e-06, + "loss": 0.602, "step": 7767 }, { - "epoch": 0.82, - "grad_norm": 2.430856695338035, - "learning_rate": 8.491133789797307e-07, - "loss": 0.6279, + "epoch": 0.55, + "grad_norm": 1.844282904169627, + "learning_rate": 4.414695338319263e-06, + "loss": 0.613, "step": 7768 }, { - "epoch": 0.82, - "grad_norm": 2.770347480171204, - "learning_rate": 8.481635083116668e-07, - "loss": 0.6057, + "epoch": 0.55, + "grad_norm": 1.9171261247811033, + "learning_rate": 4.41355408764958e-06, + "loss": 0.5833, "step": 7769 }, { - "epoch": 0.82, - "grad_norm": 3.069965846489253, - "learning_rate": 8.472141199919664e-07, - "loss": 0.5681, + "epoch": 0.55, + "grad_norm": 1.7451110984460656, + "learning_rate": 4.412412867957957e-06, + "loss": 0.5413, "step": 7770 }, { - "epoch": 0.82, - "grad_norm": 2.9992990174535383, - "learning_rate": 8.462652141309242e-07, - "loss": 0.6433, + "epoch": 0.55, + "grad_norm": 1.473266560572293, + "learning_rate": 4.411271679304675e-06, + "loss": 0.5089, "step": 7771 }, { - "epoch": 0.82, - "grad_norm": 2.79447885278482, - "learning_rate": 8.453167908387827e-07, - "loss": 0.6942, + "epoch": 0.55, + "grad_norm": 2.007570773228279, + "learning_rate": 4.410130521750019e-06, + "loss": 0.5103, "step": 7772 }, { - "epoch": 0.82, - "grad_norm": 2.3694158265158682, - "learning_rate": 8.443688502257253e-07, - "loss": 0.5695, + "epoch": 0.55, + "grad_norm": 1.8712021694148837, + "learning_rate": 4.4089893953542675e-06, + "loss": 0.5544, "step": 7773 }, { - "epoch": 0.82, - "grad_norm": 2.6521967702602134, - "learning_rate": 8.434213924018836e-07, - "loss": 0.6175, + "epoch": 0.55, + "grad_norm": 1.5133033506015046, + "learning_rate": 4.4078483001777e-06, + "loss": 0.5159, "step": 7774 }, { - "epoch": 0.82, - "grad_norm": 6.450142386712109, - "learning_rate": 8.424744174773281e-07, - "loss": 0.5917, + "epoch": 0.55, + "grad_norm": 0.7197047394891494, + "learning_rate": 4.406707236280592e-06, + "loss": 0.4108, "step": 7775 }, { - "epoch": 0.82, - "grad_norm": 2.60990893147203, - "learning_rate": 8.415279255620762e-07, - "loss": 0.7291, + "epoch": 0.55, + "grad_norm": 1.6294675883751288, + "learning_rate": 4.405566203723215e-06, + "loss": 0.4921, "step": 7776 }, { - "epoch": 0.82, - "grad_norm": 2.3162030741562702, - "learning_rate": 8.405819167660906e-07, - "loss": 0.5496, + "epoch": 0.55, + "grad_norm": 1.6664767756236918, + "learning_rate": 4.404425202565847e-06, + "loss": 0.5673, "step": 7777 }, { - "epoch": 0.82, - "grad_norm": 4.454400577973904, - "learning_rate": 8.396363911992739e-07, - "loss": 0.6948, + "epoch": 0.55, + "grad_norm": 2.388514057804789, + "learning_rate": 4.40328423286876e-06, + "loss": 0.539, "step": 7778 }, { - "epoch": 0.82, - "grad_norm": 2.833872698534407, - "learning_rate": 8.386913489714737e-07, - "loss": 0.5477, + "epoch": 0.55, + "grad_norm": 2.9914413452668716, + "learning_rate": 4.4021432946922185e-06, + "loss": 0.5772, "step": 7779 }, { - "epoch": 0.82, - "grad_norm": 2.445942443952145, - "learning_rate": 8.377467901924835e-07, - "loss": 0.5796, + "epoch": 0.55, + "grad_norm": 2.195722334000968, + "learning_rate": 4.4010023880964945e-06, + "loss": 0.5544, "step": 7780 }, { - "epoch": 0.82, - "grad_norm": 2.537918156805052, - "learning_rate": 8.368027149720404e-07, - "loss": 0.6614, + "epoch": 0.55, + "grad_norm": 0.730006847796988, + "learning_rate": 4.399861513141856e-06, + "loss": 0.4259, "step": 7781 }, { - "epoch": 0.82, - "grad_norm": 2.100483149802904, - "learning_rate": 8.358591234198221e-07, - "loss": 0.6177, + "epoch": 0.55, + "grad_norm": 1.6588769059618975, + "learning_rate": 4.398720669888565e-06, + "loss": 0.572, "step": 7782 }, { - "epoch": 0.82, - "grad_norm": 2.7198326768529277, - "learning_rate": 8.34916015645455e-07, - "loss": 0.625, + "epoch": 0.55, + "grad_norm": 0.8216655668975045, + "learning_rate": 4.397579858396884e-06, + "loss": 0.463, "step": 7783 }, { - "epoch": 0.82, - "grad_norm": 3.8763253745346042, - "learning_rate": 8.339733917585041e-07, - "loss": 0.504, + "epoch": 0.55, + "grad_norm": 1.7072117039453567, + "learning_rate": 4.396439078727078e-06, + "loss": 0.5346, "step": 7784 }, { - "epoch": 0.82, - "grad_norm": 4.0339142550402896, - "learning_rate": 8.330312518684813e-07, - "loss": 0.6544, + "epoch": 0.55, + "grad_norm": 1.9697290102440417, + "learning_rate": 4.395298330939405e-06, + "loss": 0.5368, "step": 7785 }, { - "epoch": 0.82, - "grad_norm": 3.372358625907128, - "learning_rate": 8.320895960848435e-07, - "loss": 0.662, + "epoch": 0.55, + "grad_norm": 2.1178385113403535, + "learning_rate": 4.394157615094123e-06, + "loss": 0.5385, "step": 7786 }, { - "epoch": 0.82, - "grad_norm": 11.12693531528549, - "learning_rate": 8.311484245169888e-07, - "loss": 0.5841, + "epoch": 0.55, + "grad_norm": 1.9039151595289425, + "learning_rate": 4.393016931251488e-06, + "loss": 0.6318, "step": 7787 }, { - "epoch": 0.82, - "grad_norm": 3.0258617844542615, - "learning_rate": 8.302077372742573e-07, - "loss": 0.5996, + "epoch": 0.55, + "grad_norm": 1.718421903359631, + "learning_rate": 4.391876279471755e-06, + "loss": 0.5053, "step": 7788 }, { - "epoch": 0.82, - "grad_norm": 2.149848595276636, - "learning_rate": 8.292675344659374e-07, - "loss": 0.5624, + "epoch": 0.55, + "grad_norm": 0.6728876867902522, + "learning_rate": 4.3907356598151794e-06, + "loss": 0.4178, "step": 7789 }, { - "epoch": 0.82, - "grad_norm": 9.921586434700963, - "learning_rate": 8.283278162012604e-07, - "loss": 0.6565, + "epoch": 0.55, + "grad_norm": 1.8231236125945454, + "learning_rate": 4.389595072342011e-06, + "loss": 0.5557, "step": 7790 }, { - "epoch": 0.82, - "grad_norm": 2.223681029796389, - "learning_rate": 8.273885825893984e-07, - "loss": 0.572, + "epoch": 0.55, + "grad_norm": 1.7420458927058629, + "learning_rate": 4.388454517112496e-06, + "loss": 0.5215, "step": 7791 }, { - "epoch": 0.82, - "grad_norm": 12.307302243450632, - "learning_rate": 8.264498337394683e-07, - "loss": 0.7158, + "epoch": 0.55, + "grad_norm": 1.8631518385484427, + "learning_rate": 4.387313994186888e-06, + "loss": 0.5711, "step": 7792 }, { - "epoch": 0.82, - "grad_norm": 3.185259212276455, - "learning_rate": 8.255115697605315e-07, - "loss": 0.6205, + "epoch": 0.55, + "grad_norm": 1.879147073204414, + "learning_rate": 4.386173503625432e-06, + "loss": 0.5617, "step": 7793 }, { - "epoch": 0.82, - "grad_norm": 2.3742771690682867, - "learning_rate": 8.245737907615948e-07, - "loss": 0.571, + "epoch": 0.55, + "grad_norm": 1.84861051716151, + "learning_rate": 4.3850330454883686e-06, + "loss": 0.4952, "step": 7794 }, { - "epoch": 0.82, - "grad_norm": 2.0306399507949875, - "learning_rate": 8.236364968516036e-07, - "loss": 0.574, + "epoch": 0.55, + "grad_norm": 1.7934780855399672, + "learning_rate": 4.383892619835944e-06, + "loss": 0.5179, "step": 7795 }, { - "epoch": 0.82, - "grad_norm": 2.95185086622032, - "learning_rate": 8.226996881394533e-07, - "loss": 0.5969, + "epoch": 0.55, + "grad_norm": 1.776057060050692, + "learning_rate": 4.3827522267284e-06, + "loss": 0.5742, "step": 7796 }, { - "epoch": 0.82, - "grad_norm": 2.83828313253317, - "learning_rate": 8.217633647339762e-07, - "loss": 0.6308, + "epoch": 0.55, + "grad_norm": 2.763400440336179, + "learning_rate": 4.381611866225976e-06, + "loss": 0.5537, "step": 7797 }, { - "epoch": 0.82, - "grad_norm": 2.740058288843735, - "learning_rate": 8.208275267439536e-07, - "loss": 0.6409, + "epoch": 0.55, + "grad_norm": 1.6807216223020902, + "learning_rate": 4.3804715383889056e-06, + "loss": 0.5037, "step": 7798 }, { - "epoch": 0.82, - "grad_norm": 3.205986939831031, - "learning_rate": 8.19892174278109e-07, - "loss": 0.638, + "epoch": 0.55, + "grad_norm": 1.8115420558704574, + "learning_rate": 4.379331243277429e-06, + "loss": 0.5453, "step": 7799 }, { - "epoch": 0.82, - "grad_norm": 3.133784418145159, - "learning_rate": 8.189573074451084e-07, - "loss": 0.5896, + "epoch": 0.55, + "grad_norm": 1.9751866537783156, + "learning_rate": 4.378190980951781e-06, + "loss": 0.5412, "step": 7800 }, { - "epoch": 0.82, - "grad_norm": 4.651287297043312, - "learning_rate": 8.180229263535605e-07, - "loss": 0.6102, + "epoch": 0.55, + "grad_norm": 1.7440279714047158, + "learning_rate": 4.37705075147219e-06, + "loss": 0.5438, "step": 7801 }, { - "epoch": 0.82, - "grad_norm": 2.8083107733624537, - "learning_rate": 8.1708903111202e-07, - "loss": 0.5911, + "epoch": 0.55, + "grad_norm": 1.638571472114985, + "learning_rate": 4.37591055489889e-06, + "loss": 0.4844, "step": 7802 }, { - "epoch": 0.82, - "grad_norm": 2.697503873505146, - "learning_rate": 8.161556218289857e-07, - "loss": 0.5624, + "epoch": 0.55, + "grad_norm": 1.5784936289648661, + "learning_rate": 4.374770391292109e-06, + "loss": 0.5324, "step": 7803 }, { - "epoch": 0.82, - "grad_norm": 2.3094841871089065, - "learning_rate": 8.15222698612897e-07, - "loss": 0.5943, + "epoch": 0.55, + "grad_norm": 1.9484976154087845, + "learning_rate": 4.373630260712076e-06, + "loss": 0.5702, "step": 7804 }, { - "epoch": 0.82, - "grad_norm": 2.791054703314953, - "learning_rate": 8.142902615721371e-07, - "loss": 0.5893, + "epoch": 0.55, + "grad_norm": 1.6275976624791946, + "learning_rate": 4.372490163219014e-06, + "loss": 0.5105, "step": 7805 }, { - "epoch": 0.82, - "grad_norm": 2.2449207941075198, - "learning_rate": 8.133583108150345e-07, - "loss": 0.5883, + "epoch": 0.55, + "grad_norm": 1.7507898090464409, + "learning_rate": 4.371350098873146e-06, + "loss": 0.5245, "step": 7806 }, { - "epoch": 0.82, - "grad_norm": 6.264533076641818, - "learning_rate": 8.124268464498625e-07, - "loss": 0.5802, + "epoch": 0.55, + "grad_norm": 0.7084981358941057, + "learning_rate": 4.370210067734699e-06, + "loss": 0.4516, "step": 7807 }, { - "epoch": 0.82, - "grad_norm": 2.397265245054088, - "learning_rate": 8.114958685848334e-07, - "loss": 0.7004, + "epoch": 0.55, + "grad_norm": 1.6497529147543804, + "learning_rate": 4.369070069863889e-06, + "loss": 0.5618, "step": 7808 }, { - "epoch": 0.82, - "grad_norm": 2.687783198325523, - "learning_rate": 8.105653773281074e-07, - "loss": 0.633, + "epoch": 0.55, + "grad_norm": 1.95357466004288, + "learning_rate": 4.367930105320936e-06, + "loss": 0.5119, "step": 7809 }, { - "epoch": 0.82, - "grad_norm": 4.572173865987845, - "learning_rate": 8.096353727877843e-07, - "loss": 0.6158, + "epoch": 0.55, + "grad_norm": 1.989629686565725, + "learning_rate": 4.366790174166055e-06, + "loss": 0.5912, "step": 7810 }, { - "epoch": 0.82, - "grad_norm": 2.084933298498584, - "learning_rate": 8.087058550719107e-07, - "loss": 0.6373, + "epoch": 0.55, + "grad_norm": 1.7211167585283818, + "learning_rate": 4.3656502764594646e-06, + "loss": 0.5332, "step": 7811 }, { - "epoch": 0.82, - "grad_norm": 4.117019095691375, - "learning_rate": 8.077768242884759e-07, - "loss": 0.5337, + "epoch": 0.55, + "grad_norm": 1.8431106689495764, + "learning_rate": 4.364510412261376e-06, + "loss": 0.5741, "step": 7812 }, { - "epoch": 0.82, - "grad_norm": 2.526067117482907, - "learning_rate": 8.068482805454115e-07, - "loss": 0.5935, + "epoch": 0.55, + "grad_norm": 0.6840502659819915, + "learning_rate": 4.363370581631999e-06, + "loss": 0.4175, "step": 7813 }, { - "epoch": 0.82, - "grad_norm": 2.770744913286671, - "learning_rate": 8.059202239505915e-07, - "loss": 0.6824, + "epoch": 0.55, + "grad_norm": 2.017976327659427, + "learning_rate": 4.362230784631547e-06, + "loss": 0.5395, "step": 7814 }, { - "epoch": 0.82, - "grad_norm": 2.3876150794603417, - "learning_rate": 8.049926546118359e-07, - "loss": 0.6197, + "epoch": 0.55, + "grad_norm": 1.5165424164803367, + "learning_rate": 4.361091021320226e-06, + "loss": 0.4728, "step": 7815 }, { - "epoch": 0.82, - "grad_norm": 2.301040153613851, - "learning_rate": 8.040655726369079e-07, - "loss": 0.6231, + "epoch": 0.55, + "grad_norm": 1.4773008720638456, + "learning_rate": 4.359951291758241e-06, + "loss": 0.555, "step": 7816 }, { - "epoch": 0.82, - "grad_norm": 2.1640120127578, - "learning_rate": 8.031389781335119e-07, - "loss": 0.5915, + "epoch": 0.55, + "grad_norm": 1.6323077521353688, + "learning_rate": 4.358811596005798e-06, + "loss": 0.6192, "step": 7817 }, { - "epoch": 0.82, - "grad_norm": 1.0216851076508051, - "learning_rate": 8.022128712092986e-07, - "loss": 0.5629, + "epoch": 0.55, + "grad_norm": 1.7302528178566496, + "learning_rate": 4.357671934123098e-06, + "loss": 0.6084, "step": 7818 }, { - "epoch": 0.82, - "grad_norm": 2.3407168223787758, - "learning_rate": 8.012872519718578e-07, - "loss": 0.5779, + "epoch": 0.55, + "grad_norm": 1.5936046359106553, + "learning_rate": 4.356532306170345e-06, + "loss": 0.5527, "step": 7819 }, { - "epoch": 0.82, - "grad_norm": 3.240628782070062, - "learning_rate": 8.003621205287271e-07, - "loss": 0.6247, + "epoch": 0.55, + "grad_norm": 2.2671784289550514, + "learning_rate": 4.355392712207735e-06, + "loss": 0.5223, "step": 7820 }, { - "epoch": 0.82, - "grad_norm": 2.5968226751951202, - "learning_rate": 7.994374769873864e-07, - "loss": 0.5631, + "epoch": 0.55, + "grad_norm": 1.4254670052092133, + "learning_rate": 4.354253152295466e-06, + "loss": 0.4774, "step": 7821 }, { - "epoch": 0.82, - "grad_norm": 2.9225493166351844, - "learning_rate": 7.98513321455257e-07, - "loss": 0.5619, + "epoch": 0.56, + "grad_norm": 1.7957302616474857, + "learning_rate": 4.353113626493734e-06, + "loss": 0.5295, "step": 7822 }, { - "epoch": 0.82, - "grad_norm": 2.5510204032518633, - "learning_rate": 7.975896540397038e-07, - "loss": 0.5647, + "epoch": 0.56, + "grad_norm": 1.5684951681519816, + "learning_rate": 4.351974134862731e-06, + "loss": 0.5719, "step": 7823 }, { - "epoch": 0.82, - "grad_norm": 3.2618088874821436, - "learning_rate": 7.966664748480362e-07, - "loss": 0.681, + "epoch": 0.56, + "grad_norm": 0.7705018806038914, + "learning_rate": 4.350834677462652e-06, + "loss": 0.3857, "step": 7824 }, { - "epoch": 0.82, - "grad_norm": 2.432627062148813, - "learning_rate": 7.957437839875088e-07, - "loss": 0.619, + "epoch": 0.56, + "grad_norm": 1.6763944921733034, + "learning_rate": 4.349695254353683e-06, + "loss": 0.5023, "step": 7825 }, { - "epoch": 0.82, - "grad_norm": 2.881899570004556, - "learning_rate": 7.948215815653149e-07, - "loss": 0.5353, + "epoch": 0.56, + "grad_norm": 3.2335198450429417, + "learning_rate": 4.348555865596015e-06, + "loss": 0.5707, "step": 7826 }, { - "epoch": 0.82, - "grad_norm": 2.211030969729491, - "learning_rate": 7.938998676885922e-07, - "loss": 0.5992, + "epoch": 0.56, + "grad_norm": 1.5283477817297213, + "learning_rate": 4.347416511249835e-06, + "loss": 0.5191, "step": 7827 }, { - "epoch": 0.82, - "grad_norm": 2.7189882982651374, - "learning_rate": 7.929786424644248e-07, - "loss": 0.5802, + "epoch": 0.56, + "grad_norm": 1.7194627146723842, + "learning_rate": 4.346277191375323e-06, + "loss": 0.5596, "step": 7828 }, { - "epoch": 0.82, - "grad_norm": 2.6023452660932165, - "learning_rate": 7.920579059998384e-07, - "loss": 0.575, + "epoch": 0.56, + "grad_norm": 2.1607157690734593, + "learning_rate": 4.345137906032668e-06, + "loss": 0.4495, "step": 7829 }, { - "epoch": 0.82, - "grad_norm": 2.417297217989513, - "learning_rate": 7.911376584017993e-07, - "loss": 0.5887, + "epoch": 0.56, + "grad_norm": 2.29182295438363, + "learning_rate": 4.343998655282047e-06, + "loss": 0.5135, "step": 7830 }, { - "epoch": 0.82, - "grad_norm": 2.636618821203334, - "learning_rate": 7.90217899777222e-07, - "loss": 0.5957, + "epoch": 0.56, + "grad_norm": 1.5223211769287408, + "learning_rate": 4.342859439183639e-06, + "loss": 0.5077, "step": 7831 }, { - "epoch": 0.82, - "grad_norm": 2.783745505915478, - "learning_rate": 7.89298630232958e-07, - "loss": 0.6066, + "epoch": 0.56, + "grad_norm": 1.7192749796162712, + "learning_rate": 4.341720257797621e-06, + "loss": 0.4346, "step": 7832 }, { - "epoch": 0.82, - "grad_norm": 2.211994261833092, - "learning_rate": 7.883798498758077e-07, - "loss": 0.5615, + "epoch": 0.56, + "grad_norm": 2.2826772103730417, + "learning_rate": 4.340581111184171e-06, + "loss": 0.5066, "step": 7833 }, { - "epoch": 0.82, - "grad_norm": 2.316992802552742, - "learning_rate": 7.874615588125128e-07, - "loss": 0.5865, + "epoch": 0.56, + "grad_norm": 1.6415873014866662, + "learning_rate": 4.339441999403463e-06, + "loss": 0.5159, "step": 7834 }, { - "epoch": 0.82, - "grad_norm": 2.540384973156056, - "learning_rate": 7.865437571497569e-07, - "loss": 0.5444, + "epoch": 0.56, + "grad_norm": 1.5038585188988054, + "learning_rate": 4.338302922515665e-06, + "loss": 0.5117, "step": 7835 }, { - "epoch": 0.82, - "grad_norm": 2.2642580877781904, - "learning_rate": 7.856264449941664e-07, - "loss": 0.5456, + "epoch": 0.56, + "grad_norm": 1.7216132531026234, + "learning_rate": 4.337163880580948e-06, + "loss": 0.5675, "step": 7836 }, { - "epoch": 0.82, - "grad_norm": 2.5525408933985214, - "learning_rate": 7.847096224523132e-07, - "loss": 0.607, + "epoch": 0.56, + "grad_norm": 1.8625720491645377, + "learning_rate": 4.3360248736594825e-06, + "loss": 0.4673, "step": 7837 }, { - "epoch": 0.82, - "grad_norm": 2.3245711806217613, - "learning_rate": 7.837932896307116e-07, - "loss": 0.5573, + "epoch": 0.56, + "grad_norm": 1.5799705660445293, + "learning_rate": 4.3348859018114315e-06, + "loss": 0.4858, "step": 7838 }, { - "epoch": 0.82, - "grad_norm": 3.05579417550974, - "learning_rate": 7.82877446635818e-07, - "loss": 0.5761, + "epoch": 0.56, + "grad_norm": 1.726511685390362, + "learning_rate": 4.333746965096962e-06, + "loss": 0.5636, "step": 7839 }, { - "epoch": 0.83, - "grad_norm": 2.5025364694067136, - "learning_rate": 7.819620935740313e-07, - "loss": 0.5728, + "epoch": 0.56, + "grad_norm": 1.7745429996191497, + "learning_rate": 4.332608063576234e-06, + "loss": 0.5607, "step": 7840 }, { - "epoch": 0.83, - "grad_norm": 2.541642302021767, - "learning_rate": 7.810472305516947e-07, - "loss": 0.6021, + "epoch": 0.56, + "grad_norm": 1.7141629717751148, + "learning_rate": 4.331469197309411e-06, + "loss": 0.6035, "step": 7841 }, { - "epoch": 0.83, - "grad_norm": 2.6527868881900747, - "learning_rate": 7.801328576750971e-07, - "loss": 0.5955, + "epoch": 0.56, + "grad_norm": 0.7360623857920902, + "learning_rate": 4.33033036635665e-06, + "loss": 0.4467, "step": 7842 }, { - "epoch": 0.83, - "grad_norm": 3.035882397492675, - "learning_rate": 7.792189750504642e-07, - "loss": 0.559, + "epoch": 0.56, + "grad_norm": 1.8722853889551492, + "learning_rate": 4.329191570778109e-06, + "loss": 0.5184, "step": 7843 }, { - "epoch": 0.83, - "grad_norm": 3.4225984060139196, - "learning_rate": 7.783055827839709e-07, - "loss": 0.594, + "epoch": 0.56, + "grad_norm": 1.7789853774744986, + "learning_rate": 4.32805281063394e-06, + "loss": 0.553, "step": 7844 }, { - "epoch": 0.83, - "grad_norm": 2.435724626217545, - "learning_rate": 7.7739268098173e-07, - "loss": 0.593, + "epoch": 0.56, + "grad_norm": 1.7319756416649699, + "learning_rate": 4.3269140859843e-06, + "loss": 0.5626, "step": 7845 }, { - "epoch": 0.83, - "grad_norm": 2.4904962113373412, - "learning_rate": 7.764802697498009e-07, - "loss": 0.615, + "epoch": 0.56, + "grad_norm": 1.5813526385949301, + "learning_rate": 4.325775396889338e-06, + "loss": 0.5181, "step": 7846 }, { - "epoch": 0.83, - "grad_norm": 2.464037403138296, - "learning_rate": 7.755683491941867e-07, - "loss": 0.7109, + "epoch": 0.56, + "grad_norm": 1.800074308528196, + "learning_rate": 4.3246367434092025e-06, + "loss": 0.5501, "step": 7847 }, { - "epoch": 0.83, - "grad_norm": 2.9748316199128824, - "learning_rate": 7.746569194208298e-07, - "loss": 0.6551, + "epoch": 0.56, + "grad_norm": 1.898057185864801, + "learning_rate": 4.3234981256040455e-06, + "loss": 0.5615, "step": 7848 }, { - "epoch": 0.83, - "grad_norm": 2.2358191510721923, - "learning_rate": 7.737459805356168e-07, - "loss": 0.5977, + "epoch": 0.56, + "grad_norm": 0.670489997161316, + "learning_rate": 4.322359543534009e-06, + "loss": 0.4228, "step": 7849 }, { - "epoch": 0.83, - "grad_norm": 2.419490109419338, - "learning_rate": 7.728355326443792e-07, - "loss": 0.5004, + "epoch": 0.56, + "grad_norm": 2.1542292162637353, + "learning_rate": 4.321220997259239e-06, + "loss": 0.5841, "step": 7850 }, { - "epoch": 0.83, - "grad_norm": 2.911759699027262, - "learning_rate": 7.719255758528904e-07, - "loss": 0.604, + "epoch": 0.56, + "grad_norm": 0.6851057746991106, + "learning_rate": 4.320082486839873e-06, + "loss": 0.4341, "step": 7851 }, { - "epoch": 0.83, - "grad_norm": 2.859440106403186, - "learning_rate": 7.710161102668667e-07, - "loss": 0.5881, + "epoch": 0.56, + "grad_norm": 1.5947123377717245, + "learning_rate": 4.3189440123360555e-06, + "loss": 0.5204, "step": 7852 }, { - "epoch": 0.83, - "grad_norm": 3.133615283383733, - "learning_rate": 7.701071359919654e-07, - "loss": 0.5945, + "epoch": 0.56, + "grad_norm": 2.449833099147218, + "learning_rate": 4.317805573807922e-06, + "loss": 0.4984, "step": 7853 }, { - "epoch": 0.83, - "grad_norm": 2.754614469699022, - "learning_rate": 7.691986531337891e-07, - "loss": 0.5677, + "epoch": 0.56, + "grad_norm": 2.0017016770278757, + "learning_rate": 4.316667171315611e-06, + "loss": 0.5367, "step": 7854 }, { - "epoch": 0.83, - "grad_norm": 2.7443708202298573, - "learning_rate": 7.682906617978836e-07, - "loss": 0.6404, + "epoch": 0.56, + "grad_norm": 2.2407312684829734, + "learning_rate": 4.315528804919254e-06, + "loss": 0.5199, "step": 7855 }, { - "epoch": 0.83, - "grad_norm": 3.087205618369263, - "learning_rate": 7.673831620897376e-07, - "loss": 0.6038, + "epoch": 0.56, + "grad_norm": 1.7422178030798168, + "learning_rate": 4.314390474678986e-06, + "loss": 0.5856, "step": 7856 }, { - "epoch": 0.83, - "grad_norm": 5.5096389980291365, - "learning_rate": 7.664761541147803e-07, - "loss": 0.6013, + "epoch": 0.56, + "grad_norm": 2.1312897948271763, + "learning_rate": 4.313252180654937e-06, + "loss": 0.4976, "step": 7857 }, { - "epoch": 0.83, - "grad_norm": 2.4275211367126226, - "learning_rate": 7.65569637978385e-07, - "loss": 0.6306, + "epoch": 0.56, + "grad_norm": 1.503900236580758, + "learning_rate": 4.312113922907235e-06, + "loss": 0.4753, "step": 7858 }, { - "epoch": 0.83, - "grad_norm": 3.0724412487922104, - "learning_rate": 7.646636137858682e-07, - "loss": 0.5641, + "epoch": 0.56, + "grad_norm": 1.5829677292329811, + "learning_rate": 4.310975701496004e-06, + "loss": 0.5471, "step": 7859 }, { - "epoch": 0.83, - "grad_norm": 2.6908765435507656, - "learning_rate": 7.637580816424906e-07, - "loss": 0.5556, + "epoch": 0.56, + "grad_norm": 1.7182784106414322, + "learning_rate": 4.309837516481372e-06, + "loss": 0.5062, "step": 7860 }, { - "epoch": 0.83, - "grad_norm": 2.2336816700244158, - "learning_rate": 7.628530416534536e-07, - "loss": 0.6658, + "epoch": 0.56, + "grad_norm": 1.7495510359338038, + "learning_rate": 4.308699367923462e-06, + "loss": 0.6341, "step": 7861 }, { - "epoch": 0.83, - "grad_norm": 5.534474436786471, - "learning_rate": 7.619484939239008e-07, - "loss": 0.5065, + "epoch": 0.56, + "grad_norm": 1.6048786578238332, + "learning_rate": 4.307561255882392e-06, + "loss": 0.5499, "step": 7862 }, { - "epoch": 0.83, - "grad_norm": 2.25517760727134, - "learning_rate": 7.610444385589206e-07, - "loss": 0.5669, + "epoch": 0.56, + "grad_norm": 1.5862666620284023, + "learning_rate": 4.306423180418285e-06, + "loss": 0.558, "step": 7863 }, { - "epoch": 0.83, - "grad_norm": 2.4664200875331077, - "learning_rate": 7.601408756635454e-07, - "loss": 0.5449, + "epoch": 0.56, + "grad_norm": 0.7297096470208068, + "learning_rate": 4.3052851415912556e-06, + "loss": 0.4233, "step": 7864 }, { - "epoch": 0.83, - "grad_norm": 2.3983166993158402, - "learning_rate": 7.592378053427463e-07, - "loss": 0.631, + "epoch": 0.56, + "grad_norm": 0.8147348307588197, + "learning_rate": 4.304147139461419e-06, + "loss": 0.4562, "step": 7865 }, { - "epoch": 0.83, - "grad_norm": 2.44965513205932, - "learning_rate": 7.583352277014405e-07, - "loss": 0.6007, + "epoch": 0.56, + "grad_norm": 1.9887050092621918, + "learning_rate": 4.303009174088886e-06, + "loss": 0.5422, "step": 7866 }, { - "epoch": 0.83, - "grad_norm": 2.783794296236303, - "learning_rate": 7.574331428444859e-07, - "loss": 0.6112, + "epoch": 0.56, + "grad_norm": 2.15571059569158, + "learning_rate": 4.301871245533772e-06, + "loss": 0.5847, "step": 7867 }, { - "epoch": 0.83, - "grad_norm": 7.858796558481055, - "learning_rate": 7.565315508766846e-07, - "loss": 0.6879, + "epoch": 0.56, + "grad_norm": 1.7251748094925652, + "learning_rate": 4.300733353856185e-06, + "loss": 0.5531, "step": 7868 }, { - "epoch": 0.83, - "grad_norm": 2.3392675349173246, - "learning_rate": 7.556304519027824e-07, - "loss": 0.535, + "epoch": 0.56, + "grad_norm": 1.8642798732561452, + "learning_rate": 4.299595499116231e-06, + "loss": 0.4715, "step": 7869 }, { - "epoch": 0.83, - "grad_norm": 2.345381147919508, - "learning_rate": 7.547298460274655e-07, - "loss": 0.5671, + "epoch": 0.56, + "grad_norm": 1.7568857492937195, + "learning_rate": 4.298457681374014e-06, + "loss": 0.5195, "step": 7870 }, { - "epoch": 0.83, - "grad_norm": 1.960829913288072, - "learning_rate": 7.538297333553613e-07, - "loss": 0.5243, + "epoch": 0.56, + "grad_norm": 1.8848322800106194, + "learning_rate": 4.2973199006896425e-06, + "loss": 0.5321, "step": 7871 }, { - "epoch": 0.83, - "grad_norm": 2.4386836992879974, - "learning_rate": 7.529301139910444e-07, - "loss": 0.5946, + "epoch": 0.56, + "grad_norm": 1.6183151548191805, + "learning_rate": 4.2961821571232145e-06, + "loss": 0.5381, "step": 7872 }, { - "epoch": 0.83, - "grad_norm": 2.1914556637515674, - "learning_rate": 7.520309880390314e-07, - "loss": 0.5832, + "epoch": 0.56, + "grad_norm": 1.5826147380663012, + "learning_rate": 4.2950444507348295e-06, + "loss": 0.5422, "step": 7873 }, { - "epoch": 0.83, - "grad_norm": 4.241007701198296, - "learning_rate": 7.51132355603778e-07, - "loss": 0.5416, + "epoch": 0.56, + "grad_norm": 1.6345221352652015, + "learning_rate": 4.293906781584584e-06, + "loss": 0.5882, "step": 7874 }, { - "epoch": 0.83, - "grad_norm": 2.0724515989906513, - "learning_rate": 7.502342167896847e-07, - "loss": 0.5964, + "epoch": 0.56, + "grad_norm": 1.709657323813796, + "learning_rate": 4.2927691497325755e-06, + "loss": 0.5887, "step": 7875 }, { - "epoch": 0.83, - "grad_norm": 2.5100584726157886, - "learning_rate": 7.493365717010947e-07, - "loss": 0.631, + "epoch": 0.56, + "grad_norm": 1.572199562853259, + "learning_rate": 4.291631555238898e-06, + "loss": 0.5192, "step": 7876 }, { - "epoch": 0.83, - "grad_norm": 2.7356056595469536, - "learning_rate": 7.484394204422962e-07, - "loss": 0.5424, + "epoch": 0.56, + "grad_norm": 2.0713121371920975, + "learning_rate": 4.290493998163639e-06, + "loss": 0.5504, "step": 7877 }, { - "epoch": 0.83, - "grad_norm": 2.0885569649811786, - "learning_rate": 7.475427631175141e-07, - "loss": 0.5181, + "epoch": 0.56, + "grad_norm": 2.0418677259373217, + "learning_rate": 4.289356478566894e-06, + "loss": 0.5403, "step": 7878 }, { - "epoch": 0.83, - "grad_norm": 2.304547742686197, - "learning_rate": 7.466465998309225e-07, - "loss": 0.6151, + "epoch": 0.56, + "grad_norm": 1.8187259021838331, + "learning_rate": 4.288218996508748e-06, + "loss": 0.5242, "step": 7879 }, { - "epoch": 0.83, - "grad_norm": 2.2979784331851913, - "learning_rate": 7.457509306866329e-07, - "loss": 0.569, + "epoch": 0.56, + "grad_norm": 1.9829637019030217, + "learning_rate": 4.287081552049286e-06, + "loss": 0.5139, "step": 7880 }, { - "epoch": 0.83, - "grad_norm": 3.0618039183537324, - "learning_rate": 7.448557557887021e-07, - "loss": 0.586, + "epoch": 0.56, + "grad_norm": 2.034555080093708, + "learning_rate": 4.285944145248591e-06, + "loss": 0.5698, "step": 7881 }, { - "epoch": 0.83, - "grad_norm": 3.5231994277364835, - "learning_rate": 7.439610752411303e-07, - "loss": 0.6385, + "epoch": 0.56, + "grad_norm": 1.5082810321087645, + "learning_rate": 4.284806776166747e-06, + "loss": 0.5278, "step": 7882 }, { - "epoch": 0.83, - "grad_norm": 2.5269036735363617, - "learning_rate": 7.430668891478576e-07, - "loss": 0.5901, + "epoch": 0.56, + "grad_norm": 1.8052116370629787, + "learning_rate": 4.283669444863832e-06, + "loss": 0.5902, "step": 7883 }, { - "epoch": 0.83, - "grad_norm": 2.3674099325580866, - "learning_rate": 7.421731976127672e-07, - "loss": 0.7775, + "epoch": 0.56, + "grad_norm": 2.0653800095688974, + "learning_rate": 4.282532151399924e-06, + "loss": 0.4874, "step": 7884 }, { - "epoch": 0.83, - "grad_norm": 2.671376901489743, - "learning_rate": 7.41280000739687e-07, - "loss": 0.5261, + "epoch": 0.56, + "grad_norm": 1.9350174734699683, + "learning_rate": 4.281394895835097e-06, + "loss": 0.5182, "step": 7885 }, { - "epoch": 0.83, - "grad_norm": 2.820411629914039, - "learning_rate": 7.403872986323862e-07, - "loss": 0.569, + "epoch": 0.56, + "grad_norm": 2.012087286743686, + "learning_rate": 4.28025767822943e-06, + "loss": 0.5556, "step": 7886 }, { - "epoch": 0.83, - "grad_norm": 2.577175781646432, - "learning_rate": 7.394950913945759e-07, - "loss": 0.5512, + "epoch": 0.56, + "grad_norm": 1.7079958813586946, + "learning_rate": 4.279120498642991e-06, + "loss": 0.5055, "step": 7887 }, { - "epoch": 0.83, - "grad_norm": 2.4642031464494174, - "learning_rate": 7.386033791299091e-07, - "loss": 0.6127, + "epoch": 0.56, + "grad_norm": 1.5495749273209722, + "learning_rate": 4.277983357135848e-06, + "loss": 0.4524, "step": 7888 }, { - "epoch": 0.83, - "grad_norm": 2.434672247809207, - "learning_rate": 7.37712161941983e-07, - "loss": 0.6554, + "epoch": 0.56, + "grad_norm": 1.8789911970380075, + "learning_rate": 4.276846253768071e-06, + "loss": 0.4541, "step": 7889 }, { - "epoch": 0.83, - "grad_norm": 2.5103511043710465, - "learning_rate": 7.368214399343371e-07, - "loss": 0.5331, + "epoch": 0.56, + "grad_norm": 1.9401203949716959, + "learning_rate": 4.275709188599725e-06, + "loss": 0.5731, "step": 7890 }, { - "epoch": 0.83, - "grad_norm": 2.7655999882645057, - "learning_rate": 7.35931213210454e-07, - "loss": 0.7108, + "epoch": 0.56, + "grad_norm": 1.6858162112640054, + "learning_rate": 4.2745721616908755e-06, + "loss": 0.5133, "step": 7891 }, { - "epoch": 0.83, - "grad_norm": 3.2183982961814324, - "learning_rate": 7.350414818737562e-07, - "loss": 0.5502, + "epoch": 0.56, + "grad_norm": 1.6664719477173153, + "learning_rate": 4.27343517310158e-06, + "loss": 0.4646, "step": 7892 }, { - "epoch": 0.83, - "grad_norm": 3.586664475078197, - "learning_rate": 7.34152246027609e-07, - "loss": 0.5924, + "epoch": 0.56, + "grad_norm": 1.4847764571137774, + "learning_rate": 4.272298222891903e-06, + "loss": 0.5165, "step": 7893 }, { - "epoch": 0.83, - "grad_norm": 2.366201926305009, - "learning_rate": 7.332635057753224e-07, - "loss": 0.6236, + "epoch": 0.56, + "grad_norm": 1.6908283360539595, + "learning_rate": 4.2711613111219e-06, + "loss": 0.507, "step": 7894 }, { - "epoch": 0.83, - "grad_norm": 2.3175961493215635, - "learning_rate": 7.323752612201491e-07, - "loss": 0.5758, + "epoch": 0.56, + "grad_norm": 1.8648621546752273, + "learning_rate": 4.270024437851625e-06, + "loss": 0.5458, "step": 7895 }, { - "epoch": 0.83, - "grad_norm": 16.41576198449689, - "learning_rate": 7.314875124652815e-07, - "loss": 0.6657, + "epoch": 0.56, + "grad_norm": 2.381721035780789, + "learning_rate": 4.268887603141132e-06, + "loss": 0.5277, "step": 7896 }, { - "epoch": 0.83, - "grad_norm": 3.516520516533135, - "learning_rate": 7.306002596138551e-07, - "loss": 0.6438, + "epoch": 0.56, + "grad_norm": 1.6291358947509669, + "learning_rate": 4.267750807050474e-06, + "loss": 0.4859, "step": 7897 }, { - "epoch": 0.83, - "grad_norm": 2.9227517959801372, - "learning_rate": 7.297135027689484e-07, - "loss": 0.5942, + "epoch": 0.56, + "grad_norm": 1.844710515487976, + "learning_rate": 4.266614049639699e-06, + "loss": 0.4802, "step": 7898 }, { - "epoch": 0.83, - "grad_norm": 2.6069484447619486, - "learning_rate": 7.288272420335841e-07, - "loss": 0.5786, + "epoch": 0.56, + "grad_norm": 1.6810307856729383, + "learning_rate": 4.265477330968856e-06, + "loss": 0.4925, "step": 7899 }, { - "epoch": 0.83, - "grad_norm": 2.5621275414127007, - "learning_rate": 7.279414775107241e-07, - "loss": 0.6239, + "epoch": 0.56, + "grad_norm": 0.7327929701757846, + "learning_rate": 4.2643406510979875e-06, + "loss": 0.4241, "step": 7900 }, { - "epoch": 0.83, - "grad_norm": 2.5478933874139176, - "learning_rate": 7.270562093032724e-07, - "loss": 0.6154, + "epoch": 0.56, + "grad_norm": 1.8650091570812555, + "learning_rate": 4.26320401008714e-06, + "loss": 0.5104, "step": 7901 }, { - "epoch": 0.83, - "grad_norm": 2.5146219884970686, - "learning_rate": 7.261714375140788e-07, - "loss": 0.6525, + "epoch": 0.56, + "grad_norm": 1.5659548275582515, + "learning_rate": 4.262067407996354e-06, + "loss": 0.5232, "step": 7902 }, { - "epoch": 0.83, - "grad_norm": 4.26095835769191, - "learning_rate": 7.252871622459335e-07, - "loss": 0.6109, + "epoch": 0.56, + "grad_norm": 1.9610255563873142, + "learning_rate": 4.260930844885667e-06, + "loss": 0.5726, "step": 7903 }, { - "epoch": 0.83, - "grad_norm": 2.7915685895483926, - "learning_rate": 7.244033836015696e-07, - "loss": 0.7238, + "epoch": 0.56, + "grad_norm": 1.836026513931319, + "learning_rate": 4.259794320815116e-06, + "loss": 0.5546, "step": 7904 }, { - "epoch": 0.83, - "grad_norm": 2.44562542266984, - "learning_rate": 7.235201016836613e-07, - "loss": 0.5962, + "epoch": 0.56, + "grad_norm": 1.7088365120442235, + "learning_rate": 4.258657835844738e-06, + "loss": 0.602, "step": 7905 }, { - "epoch": 0.83, - "grad_norm": 4.6869459471637755, - "learning_rate": 7.226373165948241e-07, - "loss": 0.5836, + "epoch": 0.56, + "grad_norm": 1.5490907655980246, + "learning_rate": 4.257521390034566e-06, + "loss": 0.5131, "step": 7906 }, { - "epoch": 0.83, - "grad_norm": 4.282445234624132, - "learning_rate": 7.21755028437619e-07, - "loss": 0.6254, + "epoch": 0.56, + "grad_norm": 1.6868213567133836, + "learning_rate": 4.25638498344463e-06, + "loss": 0.5433, "step": 7907 }, { - "epoch": 0.83, - "grad_norm": 4.521493455628609, - "learning_rate": 7.208732373145483e-07, - "loss": 0.6589, + "epoch": 0.56, + "grad_norm": 1.7632771925777602, + "learning_rate": 4.2552486161349574e-06, + "loss": 0.4895, "step": 7908 }, { - "epoch": 0.83, - "grad_norm": 2.232695529398515, - "learning_rate": 7.199919433280555e-07, - "loss": 0.6094, + "epoch": 0.56, + "grad_norm": 2.4268625220883977, + "learning_rate": 4.254112288165577e-06, + "loss": 0.5024, "step": 7909 }, { - "epoch": 0.83, - "grad_norm": 3.962644679645622, - "learning_rate": 7.191111465805256e-07, - "loss": 0.5981, + "epoch": 0.56, + "grad_norm": 1.6081552856642392, + "learning_rate": 4.252975999596515e-06, + "loss": 0.5287, "step": 7910 }, { - "epoch": 0.83, - "grad_norm": 2.4471859782636165, - "learning_rate": 7.182308471742877e-07, - "loss": 0.5816, + "epoch": 0.56, + "grad_norm": 1.718699150329832, + "learning_rate": 4.2518397504877886e-06, + "loss": 0.5448, "step": 7911 }, { - "epoch": 0.83, - "grad_norm": 2.702379481712408, - "learning_rate": 7.173510452116139e-07, - "loss": 0.68, + "epoch": 0.56, + "grad_norm": 0.7931079933879331, + "learning_rate": 4.250703540899422e-06, + "loss": 0.4571, "step": 7912 }, { - "epoch": 0.83, - "grad_norm": 1.0632897742865417, - "learning_rate": 7.164717407947142e-07, - "loss": 0.5708, + "epoch": 0.56, + "grad_norm": 1.7880836426803675, + "learning_rate": 4.249567370891436e-06, + "loss": 0.5828, "step": 7913 }, { - "epoch": 0.83, - "grad_norm": 2.423696780817971, - "learning_rate": 7.155929340257467e-07, - "loss": 0.5654, + "epoch": 0.56, + "grad_norm": 2.5375097964571953, + "learning_rate": 4.248431240523844e-06, + "loss": 0.4731, "step": 7914 }, { - "epoch": 0.83, - "grad_norm": 2.0545652099506952, - "learning_rate": 7.14714625006806e-07, - "loss": 0.549, + "epoch": 0.56, + "grad_norm": 1.9239466228041664, + "learning_rate": 4.247295149856657e-06, + "loss": 0.5909, "step": 7915 }, { - "epoch": 0.83, - "grad_norm": 4.240068457011979, - "learning_rate": 7.138368138399327e-07, - "loss": 0.575, + "epoch": 0.56, + "grad_norm": 0.7507895330693232, + "learning_rate": 4.246159098949894e-06, + "loss": 0.4261, "step": 7916 }, { - "epoch": 0.83, - "grad_norm": 2.354272324791377, - "learning_rate": 7.129595006271095e-07, - "loss": 0.6664, + "epoch": 0.56, + "grad_norm": 2.0079859976777383, + "learning_rate": 4.2450230878635615e-06, + "loss": 0.5267, "step": 7917 }, { - "epoch": 0.83, - "grad_norm": 3.1167965022837523, - "learning_rate": 7.120826854702589e-07, - "loss": 0.5909, + "epoch": 0.56, + "grad_norm": 1.8712595030219465, + "learning_rate": 4.2438871166576664e-06, + "loss": 0.5821, "step": 7918 }, { - "epoch": 0.83, - "grad_norm": 2.8860947312539493, - "learning_rate": 7.112063684712456e-07, - "loss": 0.5764, + "epoch": 0.56, + "grad_norm": 0.776474569363202, + "learning_rate": 4.242751185392217e-06, + "loss": 0.4327, "step": 7919 }, { - "epoch": 0.83, - "grad_norm": 2.5639149644592307, - "learning_rate": 7.103305497318786e-07, - "loss": 0.5729, + "epoch": 0.56, + "grad_norm": 2.813042830125963, + "learning_rate": 4.241615294127215e-06, + "loss": 0.5088, "step": 7920 }, { - "epoch": 0.83, - "grad_norm": 4.157567164040736, - "learning_rate": 7.094552293539098e-07, - "loss": 0.6691, + "epoch": 0.56, + "grad_norm": 3.3139998201569933, + "learning_rate": 4.240479442922664e-06, + "loss": 0.5034, "step": 7921 }, { - "epoch": 0.83, - "grad_norm": 3.017279987323207, - "learning_rate": 7.08580407439029e-07, - "loss": 0.7188, + "epoch": 0.56, + "grad_norm": 1.5420114540584131, + "learning_rate": 4.239343631838562e-06, + "loss": 0.4644, "step": 7922 }, { - "epoch": 0.83, - "grad_norm": 3.180206068529602, - "learning_rate": 7.077060840888705e-07, - "loss": 0.5835, + "epoch": 0.56, + "grad_norm": 2.1138470591377088, + "learning_rate": 4.2382078609349055e-06, + "loss": 0.5675, "step": 7923 }, { - "epoch": 0.83, - "grad_norm": 3.2183678021512128, - "learning_rate": 7.068322594050114e-07, - "loss": 0.5853, + "epoch": 0.56, + "grad_norm": 1.8387897317479684, + "learning_rate": 4.237072130271693e-06, + "loss": 0.5485, "step": 7924 }, { - "epoch": 0.83, - "grad_norm": 2.730991122722091, - "learning_rate": 7.059589334889705e-07, - "loss": 0.5394, + "epoch": 0.56, + "grad_norm": 0.7804033673626266, + "learning_rate": 4.235936439908916e-06, + "loss": 0.4341, "step": 7925 }, { - "epoch": 0.83, - "grad_norm": 2.7372841955338085, - "learning_rate": 7.050861064422087e-07, - "loss": 0.6588, + "epoch": 0.56, + "grad_norm": 1.9451340294345458, + "learning_rate": 4.234800789906562e-06, + "loss": 0.4799, "step": 7926 }, { - "epoch": 0.83, - "grad_norm": 2.8242014928415333, - "learning_rate": 7.042137783661273e-07, - "loss": 0.6495, + "epoch": 0.56, + "grad_norm": 2.1622604872659377, + "learning_rate": 4.2336651803246255e-06, + "loss": 0.5296, "step": 7927 }, { - "epoch": 0.83, - "grad_norm": 3.3717042988457577, - "learning_rate": 7.033419493620708e-07, - "loss": 0.6167, + "epoch": 0.56, + "grad_norm": 0.7152613330917906, + "learning_rate": 4.232529611223091e-06, + "loss": 0.4302, "step": 7928 }, { - "epoch": 0.83, - "grad_norm": 0.9434034854373674, - "learning_rate": 7.024706195313258e-07, - "loss": 0.5459, + "epoch": 0.56, + "grad_norm": 2.5897003932096645, + "learning_rate": 4.231394082661944e-06, + "loss": 0.544, "step": 7929 }, { - "epoch": 0.83, - "grad_norm": 0.948265264022775, - "learning_rate": 7.015997889751225e-07, - "loss": 0.5389, + "epoch": 0.56, + "grad_norm": 0.733995343658287, + "learning_rate": 4.230258594701163e-06, + "loss": 0.4417, "step": 7930 }, { - "epoch": 0.83, - "grad_norm": 2.464153456017488, - "learning_rate": 7.007294577946306e-07, - "loss": 0.5651, + "epoch": 0.56, + "grad_norm": 0.7596755239488437, + "learning_rate": 4.229123147400733e-06, + "loss": 0.464, "step": 7931 }, { - "epoch": 0.83, - "grad_norm": 3.0196234432409446, - "learning_rate": 6.998596260909607e-07, - "loss": 0.6672, + "epoch": 0.56, + "grad_norm": 1.9273885459274698, + "learning_rate": 4.227987740820632e-06, + "loss": 0.5636, "step": 7932 }, { - "epoch": 0.83, - "grad_norm": 2.2187036742506003, - "learning_rate": 6.989902939651694e-07, - "loss": 0.6165, + "epoch": 0.56, + "grad_norm": 2.44603570176126, + "learning_rate": 4.226852375020832e-06, + "loss": 0.5237, "step": 7933 }, { - "epoch": 0.83, - "grad_norm": 2.4128874884249774, - "learning_rate": 6.981214615182541e-07, - "loss": 0.5248, + "epoch": 0.56, + "grad_norm": 1.9176826456844964, + "learning_rate": 4.22571705006131e-06, + "loss": 0.4632, "step": 7934 }, { - "epoch": 0.83, - "grad_norm": 2.714711543409674, - "learning_rate": 6.972531288511514e-07, - "loss": 0.6234, + "epoch": 0.56, + "grad_norm": 1.6687059668255804, + "learning_rate": 4.224581766002037e-06, + "loss": 0.5287, "step": 7935 }, { - "epoch": 0.84, - "grad_norm": 2.566568280987358, - "learning_rate": 6.963852960647416e-07, - "loss": 0.6142, + "epoch": 0.56, + "grad_norm": 2.156555576850282, + "learning_rate": 4.223446522902983e-06, + "loss": 0.5378, "step": 7936 }, { - "epoch": 0.84, - "grad_norm": 2.5912361052965105, - "learning_rate": 6.955179632598475e-07, - "loss": 0.6019, + "epoch": 0.56, + "grad_norm": 1.537319769974889, + "learning_rate": 4.222311320824115e-06, + "loss": 0.4414, "step": 7937 }, { - "epoch": 0.84, - "grad_norm": 3.0046865938581657, - "learning_rate": 6.946511305372327e-07, - "loss": 0.6551, + "epoch": 0.56, + "grad_norm": 1.7643441773039121, + "learning_rate": 4.221176159825397e-06, + "loss": 0.5129, "step": 7938 }, { - "epoch": 0.84, - "grad_norm": 2.62750432152481, - "learning_rate": 6.937847979976059e-07, - "loss": 0.7307, + "epoch": 0.56, + "grad_norm": 1.8243142275927837, + "learning_rate": 4.220041039966795e-06, + "loss": 0.4683, "step": 7939 }, { - "epoch": 0.84, - "grad_norm": 4.463425594035302, - "learning_rate": 6.929189657416136e-07, - "loss": 0.6263, + "epoch": 0.56, + "grad_norm": 1.7793540689110432, + "learning_rate": 4.218905961308267e-06, + "loss": 0.5224, "step": 7940 }, { - "epoch": 0.84, - "grad_norm": 2.293961976197948, - "learning_rate": 6.920536338698436e-07, - "loss": 0.7305, + "epoch": 0.56, + "grad_norm": 1.7560304497029875, + "learning_rate": 4.217770923909773e-06, + "loss": 0.5196, "step": 7941 }, { - "epoch": 0.84, - "grad_norm": 3.245455408558447, - "learning_rate": 6.911888024828295e-07, - "loss": 0.6554, + "epoch": 0.56, + "grad_norm": 4.186740782278691, + "learning_rate": 4.216635927831269e-06, + "loss": 0.5998, "step": 7942 }, { - "epoch": 0.84, - "grad_norm": 2.0647694096526474, - "learning_rate": 6.903244716810459e-07, - "loss": 0.5978, + "epoch": 0.56, + "grad_norm": 1.6752875696067164, + "learning_rate": 4.21550097313271e-06, + "loss": 0.5755, "step": 7943 }, { - "epoch": 0.84, - "grad_norm": 2.2770572793711135, - "learning_rate": 6.894606415649074e-07, - "loss": 0.6284, + "epoch": 0.56, + "grad_norm": 3.1482501821445728, + "learning_rate": 4.2143660598740495e-06, + "loss": 0.5075, "step": 7944 }, { - "epoch": 0.84, - "grad_norm": 2.590717718772522, - "learning_rate": 6.8859731223477e-07, - "loss": 0.6585, + "epoch": 0.56, + "grad_norm": 1.8028988200359461, + "learning_rate": 4.213231188115233e-06, + "loss": 0.4926, "step": 7945 }, { - "epoch": 0.84, - "grad_norm": 2.1415058085079095, - "learning_rate": 6.877344837909334e-07, - "loss": 0.6856, + "epoch": 0.56, + "grad_norm": 1.9928190440472924, + "learning_rate": 4.212096357916214e-06, + "loss": 0.4864, "step": 7946 }, { - "epoch": 0.84, - "grad_norm": 2.5987535704327875, - "learning_rate": 6.868721563336406e-07, - "loss": 0.6328, + "epoch": 0.56, + "grad_norm": 0.7393294256279669, + "learning_rate": 4.2109615693369334e-06, + "loss": 0.4417, "step": 7947 }, { - "epoch": 0.84, - "grad_norm": 2.5233039406914495, - "learning_rate": 6.860103299630722e-07, - "loss": 0.6124, + "epoch": 0.56, + "grad_norm": 2.0996513973852684, + "learning_rate": 4.209826822437336e-06, + "loss": 0.4831, "step": 7948 }, { - "epoch": 0.84, - "grad_norm": 2.524754669248327, - "learning_rate": 6.851490047793524e-07, - "loss": 0.6072, + "epoch": 0.56, + "grad_norm": 1.8397284368099365, + "learning_rate": 4.208692117277364e-06, + "loss": 0.5379, "step": 7949 }, { - "epoch": 0.84, - "grad_norm": 2.540427405947607, - "learning_rate": 6.84288180882548e-07, - "loss": 0.6113, + "epoch": 0.56, + "grad_norm": 1.7118306845783688, + "learning_rate": 4.207557453916955e-06, + "loss": 0.5019, "step": 7950 }, { - "epoch": 0.84, - "grad_norm": 0.9908048969702398, - "learning_rate": 6.834278583726677e-07, - "loss": 0.532, + "epoch": 0.56, + "grad_norm": 1.7469222819825267, + "learning_rate": 4.206422832416047e-06, + "loss": 0.524, "step": 7951 }, { - "epoch": 0.84, - "grad_norm": 3.2038346124338073, - "learning_rate": 6.825680373496618e-07, - "loss": 0.5439, + "epoch": 0.56, + "grad_norm": 1.723656114814808, + "learning_rate": 4.205288252834575e-06, + "loss": 0.509, "step": 7952 }, { - "epoch": 0.84, - "grad_norm": 0.9377925721646436, - "learning_rate": 6.817087179134208e-07, - "loss": 0.5117, + "epoch": 0.56, + "grad_norm": 0.7137311439987881, + "learning_rate": 4.204153715232468e-06, + "loss": 0.4604, "step": 7953 }, { - "epoch": 0.84, - "grad_norm": 3.407003844113903, - "learning_rate": 6.80849900163777e-07, - "loss": 0.5797, + "epoch": 0.56, + "grad_norm": 1.555600133726678, + "learning_rate": 4.203019219669661e-06, + "loss": 0.4846, "step": 7954 }, { - "epoch": 0.84, - "grad_norm": 2.2839557052539923, - "learning_rate": 6.799915842005062e-07, - "loss": 0.5703, + "epoch": 0.56, + "grad_norm": 3.2887793864405723, + "learning_rate": 4.201884766206077e-06, + "loss": 0.5567, "step": 7955 }, { - "epoch": 0.84, - "grad_norm": 2.3947698210437935, - "learning_rate": 6.791337701233269e-07, - "loss": 0.5937, + "epoch": 0.56, + "grad_norm": 1.7969233020844753, + "learning_rate": 4.200750354901646e-06, + "loss": 0.4847, "step": 7956 }, { - "epoch": 0.84, - "grad_norm": 2.6739236325422904, - "learning_rate": 6.782764580318951e-07, - "loss": 0.7199, + "epoch": 0.56, + "grad_norm": 1.470034068912422, + "learning_rate": 4.199615985816287e-06, + "loss": 0.574, "step": 7957 }, { - "epoch": 0.84, - "grad_norm": 2.1116863967686004, - "learning_rate": 6.774196480258111e-07, - "loss": 0.6494, + "epoch": 0.56, + "grad_norm": 1.9058082149355742, + "learning_rate": 4.1984816590099266e-06, + "loss": 0.5176, "step": 7958 }, { - "epoch": 0.84, - "grad_norm": 2.1683016397788997, - "learning_rate": 6.765633402046168e-07, - "loss": 0.5955, + "epoch": 0.56, + "grad_norm": 1.5804943393077662, + "learning_rate": 4.1973473745424794e-06, + "loss": 0.5054, "step": 7959 }, { - "epoch": 0.84, - "grad_norm": 3.032305057994704, - "learning_rate": 6.757075346677961e-07, - "loss": 0.603, + "epoch": 0.56, + "grad_norm": 1.708471450295443, + "learning_rate": 4.196213132473863e-06, + "loss": 0.5219, "step": 7960 }, { - "epoch": 0.84, - "grad_norm": 2.2604613175721164, - "learning_rate": 6.748522315147744e-07, - "loss": 0.6187, + "epoch": 0.56, + "grad_norm": 1.8273864161510986, + "learning_rate": 4.195078932863994e-06, + "loss": 0.6238, "step": 7961 }, { - "epoch": 0.84, - "grad_norm": 4.969636492342722, - "learning_rate": 6.739974308449176e-07, - "loss": 0.6081, + "epoch": 0.57, + "grad_norm": 2.286954896990274, + "learning_rate": 4.193944775772784e-06, + "loss": 0.5754, "step": 7962 }, { - "epoch": 0.84, - "grad_norm": 3.0339603599056373, - "learning_rate": 6.731431327575339e-07, - "loss": 0.6901, + "epoch": 0.57, + "grad_norm": 1.4548170237210307, + "learning_rate": 4.19281066126014e-06, + "loss": 0.5, "step": 7963 }, { - "epoch": 0.84, - "grad_norm": 3.2703468846615533, - "learning_rate": 6.722893373518724e-07, - "loss": 0.5622, + "epoch": 0.57, + "grad_norm": 3.122786965417151, + "learning_rate": 4.1916765893859714e-06, + "loss": 0.5815, "step": 7964 }, { - "epoch": 0.84, - "grad_norm": 2.4646080011429485, - "learning_rate": 6.714360447271273e-07, - "loss": 0.522, + "epoch": 0.57, + "grad_norm": 1.776458668293524, + "learning_rate": 4.190542560210186e-06, + "loss": 0.5829, "step": 7965 }, { - "epoch": 0.84, - "grad_norm": 2.364242035976487, - "learning_rate": 6.705832549824293e-07, - "loss": 0.6273, + "epoch": 0.57, + "grad_norm": 2.094441016793542, + "learning_rate": 4.189408573792686e-06, + "loss": 0.5427, "step": 7966 }, { - "epoch": 0.84, - "grad_norm": 2.7352098181866817, - "learning_rate": 6.69730968216853e-07, - "loss": 0.6764, + "epoch": 0.57, + "grad_norm": 1.8383510419596714, + "learning_rate": 4.1882746301933716e-06, + "loss": 0.5685, "step": 7967 }, { - "epoch": 0.84, - "grad_norm": 4.616311936586499, - "learning_rate": 6.688791845294151e-07, - "loss": 0.6525, + "epoch": 0.57, + "grad_norm": 1.637614878405265, + "learning_rate": 4.18714072947214e-06, + "loss": 0.6017, "step": 7968 }, { - "epoch": 0.84, - "grad_norm": 2.4255233825081826, - "learning_rate": 6.680279040190745e-07, - "loss": 0.6588, + "epoch": 0.57, + "grad_norm": 2.0490487669548823, + "learning_rate": 4.186006871688891e-06, + "loss": 0.5403, "step": 7969 }, { - "epoch": 0.84, - "grad_norm": 2.5378364716116764, - "learning_rate": 6.671771267847299e-07, - "loss": 0.5982, + "epoch": 0.57, + "grad_norm": 2.0274930705338816, + "learning_rate": 4.1848730569035165e-06, + "loss": 0.6037, "step": 7970 }, { - "epoch": 0.84, - "grad_norm": 2.323293588729379, - "learning_rate": 6.663268529252209e-07, - "loss": 0.6346, + "epoch": 0.57, + "grad_norm": 1.8784803379050128, + "learning_rate": 4.1837392851759105e-06, + "loss": 0.5784, "step": 7971 }, { - "epoch": 0.84, - "grad_norm": 2.164418560400295, - "learning_rate": 6.654770825393303e-07, - "loss": 0.6584, + "epoch": 0.57, + "grad_norm": 1.6962147571285617, + "learning_rate": 4.1826055565659584e-06, + "loss": 0.48, "step": 7972 }, { - "epoch": 0.84, - "grad_norm": 2.9789326506768754, - "learning_rate": 6.646278157257824e-07, - "loss": 0.5408, + "epoch": 0.57, + "grad_norm": 3.2980489937619515, + "learning_rate": 4.181471871133553e-06, + "loss": 0.5322, "step": 7973 }, { - "epoch": 0.84, - "grad_norm": 2.4473343292404257, - "learning_rate": 6.637790525832438e-07, - "loss": 0.6776, + "epoch": 0.57, + "grad_norm": 1.8811892886830837, + "learning_rate": 4.180338228938577e-06, + "loss": 0.5258, "step": 7974 }, { - "epoch": 0.84, - "grad_norm": 4.333751171962327, - "learning_rate": 6.629307932103201e-07, - "loss": 0.6228, + "epoch": 0.57, + "grad_norm": 1.7426850107668241, + "learning_rate": 4.179204630040911e-06, + "loss": 0.5583, "step": 7975 }, { - "epoch": 0.84, - "grad_norm": 4.194575988760013, - "learning_rate": 6.620830377055587e-07, - "loss": 0.5451, + "epoch": 0.57, + "grad_norm": 1.6743121050914795, + "learning_rate": 4.1780710745004395e-06, + "loss": 0.5772, "step": 7976 }, { - "epoch": 0.84, - "grad_norm": 2.1276202217427196, - "learning_rate": 6.612357861674501e-07, - "loss": 0.5791, + "epoch": 0.57, + "grad_norm": 1.850553457561551, + "learning_rate": 4.176937562377039e-06, + "loss": 0.5179, "step": 7977 }, { - "epoch": 0.84, - "grad_norm": 2.5647125903291776, - "learning_rate": 6.603890386944273e-07, - "loss": 0.5679, + "epoch": 0.57, + "grad_norm": 1.9005551691812823, + "learning_rate": 4.175804093730585e-06, + "loss": 0.6253, "step": 7978 }, { - "epoch": 0.84, - "grad_norm": 2.3644218883143338, - "learning_rate": 6.59542795384861e-07, - "loss": 0.5646, + "epoch": 0.57, + "grad_norm": 2.56592342327115, + "learning_rate": 4.17467066862095e-06, + "loss": 0.5343, "step": 7979 }, { - "epoch": 0.84, - "grad_norm": 3.3454428465301946, - "learning_rate": 6.586970563370649e-07, - "loss": 0.6593, + "epoch": 0.57, + "grad_norm": 1.818969020688428, + "learning_rate": 4.173537287108009e-06, + "loss": 0.5486, "step": 7980 }, { - "epoch": 0.84, - "grad_norm": 3.2957829519224, - "learning_rate": 6.578518216492951e-07, - "loss": 0.6074, + "epoch": 0.57, + "grad_norm": 1.9088400935929546, + "learning_rate": 4.172403949251628e-06, + "loss": 0.5128, "step": 7981 }, { - "epoch": 0.84, - "grad_norm": 2.989238602602019, - "learning_rate": 6.570070914197496e-07, - "loss": 0.6612, + "epoch": 0.57, + "grad_norm": 2.535497094078783, + "learning_rate": 4.171270655111676e-06, + "loss": 0.4881, "step": 7982 }, { - "epoch": 0.84, - "grad_norm": 2.6361300624581046, - "learning_rate": 6.561628657465663e-07, - "loss": 0.6422, + "epoch": 0.57, + "grad_norm": 1.975489106894407, + "learning_rate": 4.170137404748014e-06, + "loss": 0.5666, "step": 7983 }, { - "epoch": 0.84, - "grad_norm": 2.3784169240024085, - "learning_rate": 6.553191447278234e-07, - "loss": 0.6598, + "epoch": 0.57, + "grad_norm": 1.9279343615891371, + "learning_rate": 4.169004198220508e-06, + "loss": 0.5747, "step": 7984 }, { - "epoch": 0.84, - "grad_norm": 4.116665012500234, - "learning_rate": 6.544759284615431e-07, - "loss": 0.5628, + "epoch": 0.57, + "grad_norm": 1.9345573574549626, + "learning_rate": 4.1678710355890144e-06, + "loss": 0.5049, "step": 7985 }, { - "epoch": 0.84, - "grad_norm": 4.866416810480354, - "learning_rate": 6.536332170456877e-07, - "loss": 0.5985, + "epoch": 0.57, + "grad_norm": 2.04644699968564, + "learning_rate": 4.166737916913394e-06, + "loss": 0.5025, "step": 7986 }, { - "epoch": 0.84, - "grad_norm": 2.122808635192976, - "learning_rate": 6.527910105781626e-07, - "loss": 0.5883, + "epoch": 0.57, + "grad_norm": 1.7086665061868136, + "learning_rate": 4.1656048422534974e-06, + "loss": 0.5446, "step": 7987 }, { - "epoch": 0.84, - "grad_norm": 2.9397977174918566, - "learning_rate": 6.519493091568108e-07, - "loss": 0.6253, + "epoch": 0.57, + "grad_norm": 1.514371385976562, + "learning_rate": 4.164471811669184e-06, + "loss": 0.4818, "step": 7988 }, { - "epoch": 0.84, - "grad_norm": 5.2026307795457765, - "learning_rate": 6.511081128794183e-07, - "loss": 0.6072, + "epoch": 0.57, + "grad_norm": 1.8564785920317513, + "learning_rate": 4.163338825220299e-06, + "loss": 0.5236, "step": 7989 }, { - "epoch": 0.84, - "grad_norm": 2.391514496115847, - "learning_rate": 6.502674218437144e-07, - "loss": 0.5603, + "epoch": 0.57, + "grad_norm": 1.6143895565314013, + "learning_rate": 4.162205882966691e-06, + "loss": 0.5093, "step": 7990 }, { - "epoch": 0.84, - "grad_norm": 3.156056886121653, - "learning_rate": 6.494272361473681e-07, - "loss": 0.6524, + "epoch": 0.57, + "grad_norm": 1.6227306990244474, + "learning_rate": 4.16107298496821e-06, + "loss": 0.5946, "step": 7991 }, { - "epoch": 0.84, - "grad_norm": 3.732431840720779, - "learning_rate": 6.485875558879895e-07, - "loss": 0.6481, + "epoch": 0.57, + "grad_norm": 0.7005184064585938, + "learning_rate": 4.159940131284694e-06, + "loss": 0.4474, "step": 7992 }, { - "epoch": 0.84, - "grad_norm": 1.9873437823454094, - "learning_rate": 6.477483811631291e-07, - "loss": 0.5987, + "epoch": 0.57, + "grad_norm": 1.8705391471378023, + "learning_rate": 4.158807321975989e-06, + "loss": 0.5109, "step": 7993 }, { - "epoch": 0.84, - "grad_norm": 2.613588104761939, - "learning_rate": 6.469097120702805e-07, - "loss": 0.6542, + "epoch": 0.57, + "grad_norm": 1.6055716688852015, + "learning_rate": 4.1576745571019285e-06, + "loss": 0.4941, "step": 7994 }, { - "epoch": 0.84, - "grad_norm": 2.288884381877685, - "learning_rate": 6.460715487068781e-07, - "loss": 0.5694, + "epoch": 0.57, + "grad_norm": 1.833693387225302, + "learning_rate": 4.156541836722355e-06, + "loss": 0.5381, "step": 7995 }, { - "epoch": 0.84, - "grad_norm": 3.0335170876836783, - "learning_rate": 6.452338911702994e-07, - "loss": 0.7001, + "epoch": 0.57, + "grad_norm": 1.6914610967245645, + "learning_rate": 4.1554091608971e-06, + "loss": 0.567, "step": 7996 }, { - "epoch": 0.84, - "grad_norm": 2.7744780023995363, - "learning_rate": 6.443967395578565e-07, - "loss": 0.6253, + "epoch": 0.57, + "grad_norm": 2.7149015319014764, + "learning_rate": 4.154276529685994e-06, + "loss": 0.5381, "step": 7997 }, { - "epoch": 0.84, - "grad_norm": 2.6347984361148904, - "learning_rate": 6.435600939668096e-07, - "loss": 0.592, + "epoch": 0.57, + "grad_norm": 1.503200814227334, + "learning_rate": 4.153143943148866e-06, + "loss": 0.5392, "step": 7998 }, { - "epoch": 0.84, - "grad_norm": 3.011072253449798, - "learning_rate": 6.42723954494358e-07, - "loss": 0.6553, + "epoch": 0.57, + "grad_norm": 0.6789859940771124, + "learning_rate": 4.152011401345546e-06, + "loss": 0.4247, "step": 7999 }, { - "epoch": 0.84, - "grad_norm": 3.0584864481962364, - "learning_rate": 6.418883212376431e-07, - "loss": 0.5839, + "epoch": 0.57, + "grad_norm": 1.6692305988981593, + "learning_rate": 4.150878904335856e-06, + "loss": 0.5505, "step": 8000 }, { - "epoch": 0.84, - "grad_norm": 2.296221820496963, - "learning_rate": 6.410531942937448e-07, - "loss": 0.5928, + "epoch": 0.57, + "grad_norm": 1.8223993776015004, + "learning_rate": 4.149746452179622e-06, + "loss": 0.5021, "step": 8001 }, { - "epoch": 0.84, - "grad_norm": 2.406097985573007, - "learning_rate": 6.402185737596844e-07, - "loss": 0.6986, + "epoch": 0.57, + "grad_norm": 2.33822253734269, + "learning_rate": 4.148614044936658e-06, + "loss": 0.6324, "step": 8002 }, { - "epoch": 0.84, - "grad_norm": 2.2589114977154625, - "learning_rate": 6.393844597324278e-07, - "loss": 0.5709, + "epoch": 0.57, + "grad_norm": 1.8518221347380979, + "learning_rate": 4.147481682666787e-06, + "loss": 0.5121, "step": 8003 }, { - "epoch": 0.84, - "grad_norm": 2.7836938958633284, - "learning_rate": 6.385508523088801e-07, - "loss": 0.5831, + "epoch": 0.57, + "grad_norm": 1.6939870043935747, + "learning_rate": 4.1463493654298225e-06, + "loss": 0.5259, "step": 8004 }, { - "epoch": 0.84, - "grad_norm": 2.6551153221400194, - "learning_rate": 6.377177515858874e-07, - "loss": 0.6167, + "epoch": 0.57, + "grad_norm": 2.5748630821533043, + "learning_rate": 4.145217093285575e-06, + "loss": 0.5436, "step": 8005 }, { - "epoch": 0.84, - "grad_norm": 2.6555647926872816, - "learning_rate": 6.368851576602347e-07, - "loss": 0.5478, + "epoch": 0.57, + "grad_norm": 4.458857386133799, + "learning_rate": 4.144084866293859e-06, + "loss": 0.5443, "step": 8006 }, { - "epoch": 0.84, - "grad_norm": 3.0795265737705124, - "learning_rate": 6.360530706286516e-07, - "loss": 0.6234, + "epoch": 0.57, + "grad_norm": 1.7540567043355355, + "learning_rate": 4.142952684514481e-06, + "loss": 0.6157, "step": 8007 }, { - "epoch": 0.84, - "grad_norm": 5.01122414638164, - "learning_rate": 6.352214905878085e-07, - "loss": 0.5999, + "epoch": 0.57, + "grad_norm": 1.7308415320151724, + "learning_rate": 4.141820548007246e-06, + "loss": 0.5891, "step": 8008 }, { - "epoch": 0.84, - "grad_norm": 2.8498907965644706, - "learning_rate": 6.343904176343169e-07, - "loss": 0.6705, + "epoch": 0.57, + "grad_norm": 2.7291068312469795, + "learning_rate": 4.1406884568319546e-06, + "loss": 0.5552, "step": 8009 }, { - "epoch": 0.84, - "grad_norm": 2.4166442373218096, - "learning_rate": 6.335598518647251e-07, - "loss": 0.5388, + "epoch": 0.57, + "grad_norm": 1.9027620443646127, + "learning_rate": 4.139556411048414e-06, + "loss": 0.6054, "step": 8010 }, { - "epoch": 0.84, - "grad_norm": 2.6880105299523067, - "learning_rate": 6.327297933755272e-07, - "loss": 0.5941, + "epoch": 0.57, + "grad_norm": 1.8201480121362499, + "learning_rate": 4.13842441071642e-06, + "loss": 0.5291, "step": 8011 }, { - "epoch": 0.84, - "grad_norm": 3.846835922064916, - "learning_rate": 6.319002422631582e-07, - "loss": 0.6664, + "epoch": 0.57, + "grad_norm": 1.729437817952002, + "learning_rate": 4.137292455895767e-06, + "loss": 0.5899, "step": 8012 }, { - "epoch": 0.84, - "grad_norm": 3.031689173749022, - "learning_rate": 6.310711986239926e-07, - "loss": 0.5696, + "epoch": 0.57, + "grad_norm": 1.8041904725409132, + "learning_rate": 4.136160546646247e-06, + "loss": 0.5601, "step": 8013 }, { - "epoch": 0.84, - "grad_norm": 2.73606226733396, - "learning_rate": 6.302426625543457e-07, - "loss": 0.5136, + "epoch": 0.57, + "grad_norm": 1.8064754537516172, + "learning_rate": 4.1350286830276575e-06, + "loss": 0.6171, "step": 8014 }, { - "epoch": 0.84, - "grad_norm": 3.9278610126869204, - "learning_rate": 6.294146341504742e-07, - "loss": 0.6988, + "epoch": 0.57, + "grad_norm": 1.6763890074164498, + "learning_rate": 4.1338968650997825e-06, + "loss": 0.5193, "step": 8015 }, { - "epoch": 0.84, - "grad_norm": 2.6587658045083002, - "learning_rate": 6.285871135085758e-07, - "loss": 0.586, + "epoch": 0.57, + "grad_norm": 1.698283484502184, + "learning_rate": 4.132765092922411e-06, + "loss": 0.5487, "step": 8016 }, { - "epoch": 0.84, - "grad_norm": 2.5101848696418028, - "learning_rate": 6.277601007247913e-07, - "loss": 0.6072, + "epoch": 0.57, + "grad_norm": 1.6544110751633898, + "learning_rate": 4.131633366555323e-06, + "loss": 0.5287, "step": 8017 }, { - "epoch": 0.84, - "grad_norm": 3.094591388241789, - "learning_rate": 6.269335958951995e-07, - "loss": 0.6101, + "epoch": 0.57, + "grad_norm": 1.667581887816826, + "learning_rate": 4.130501686058306e-06, + "loss": 0.5493, "step": 8018 }, { - "epoch": 0.84, - "grad_norm": 2.1190721327022017, - "learning_rate": 6.2610759911582e-07, - "loss": 0.5325, + "epoch": 0.57, + "grad_norm": 1.8048246333346525, + "learning_rate": 4.129370051491135e-06, + "loss": 0.5198, "step": 8019 }, { - "epoch": 0.84, - "grad_norm": 2.394049016837729, - "learning_rate": 6.252821104826163e-07, - "loss": 0.6149, + "epoch": 0.57, + "grad_norm": 1.6511643902526734, + "learning_rate": 4.128238462913587e-06, + "loss": 0.5971, "step": 8020 }, { - "epoch": 0.84, - "grad_norm": 2.185048534616482, - "learning_rate": 6.244571300914909e-07, - "loss": 0.5355, + "epoch": 0.57, + "grad_norm": 1.880776327722236, + "learning_rate": 4.1271069203854385e-06, + "loss": 0.5324, "step": 8021 }, { - "epoch": 0.84, - "grad_norm": 0.9946377288465473, - "learning_rate": 6.23632658038289e-07, - "loss": 0.5187, + "epoch": 0.57, + "grad_norm": 1.7772654767339537, + "learning_rate": 4.12597542396646e-06, + "loss": 0.4887, "step": 8022 }, { - "epoch": 0.84, - "grad_norm": 3.8433532638524928, - "learning_rate": 6.228086944187939e-07, - "loss": 0.6301, + "epoch": 0.57, + "grad_norm": 1.601374787997852, + "learning_rate": 4.124843973716421e-06, + "loss": 0.5045, "step": 8023 }, { - "epoch": 0.84, - "grad_norm": 3.239562343118617, - "learning_rate": 6.219852393287302e-07, - "loss": 0.6108, + "epoch": 0.57, + "grad_norm": 1.5981204700232814, + "learning_rate": 4.123712569695089e-06, + "loss": 0.5738, "step": 8024 }, { - "epoch": 0.84, - "grad_norm": 3.19755867950319, - "learning_rate": 6.211622928637662e-07, - "loss": 0.5238, + "epoch": 0.57, + "grad_norm": 2.3673123814582278, + "learning_rate": 4.1225812119622284e-06, + "loss": 0.5314, "step": 8025 }, { - "epoch": 0.84, - "grad_norm": 2.1376360439152657, - "learning_rate": 6.2033985511951e-07, - "loss": 0.6205, + "epoch": 0.57, + "grad_norm": 2.0268922290080598, + "learning_rate": 4.121449900577602e-06, + "loss": 0.5485, "step": 8026 }, { - "epoch": 0.84, - "grad_norm": 2.563081310031987, - "learning_rate": 6.19517926191509e-07, - "loss": 0.6733, + "epoch": 0.57, + "grad_norm": 1.794710067991761, + "learning_rate": 4.120318635600968e-06, + "loss": 0.5033, "step": 8027 }, { - "epoch": 0.84, - "grad_norm": 3.266713807274647, - "learning_rate": 6.186965061752515e-07, - "loss": 0.5803, + "epoch": 0.57, + "grad_norm": 1.9127477861692541, + "learning_rate": 4.119187417092083e-06, + "loss": 0.5726, "step": 8028 }, { - "epoch": 0.84, - "grad_norm": 2.33107338075643, - "learning_rate": 6.178755951661692e-07, - "loss": 0.542, + "epoch": 0.57, + "grad_norm": 1.8824760470201243, + "learning_rate": 4.1180562451107055e-06, + "loss": 0.4956, "step": 8029 }, { - "epoch": 0.84, - "grad_norm": 2.1249280857293966, - "learning_rate": 6.170551932596336e-07, - "loss": 0.6044, + "epoch": 0.57, + "grad_norm": 1.8198457579585146, + "learning_rate": 4.116925119716585e-06, + "loss": 0.5819, "step": 8030 }, { - "epoch": 0.85, - "grad_norm": 3.222877055640638, - "learning_rate": 6.162353005509558e-07, - "loss": 0.5909, + "epoch": 0.57, + "grad_norm": 0.7391725527955739, + "learning_rate": 4.1157940409694724e-06, + "loss": 0.4431, "step": 8031 }, { - "epoch": 0.85, - "grad_norm": 2.466145340026216, - "learning_rate": 6.154159171353879e-07, - "loss": 0.6029, + "epoch": 0.57, + "grad_norm": 2.2096519678938256, + "learning_rate": 4.114663008929112e-06, + "loss": 0.6086, "step": 8032 }, { - "epoch": 0.85, - "grad_norm": 2.5315597654614805, - "learning_rate": 6.145970431081238e-07, - "loss": 0.5896, + "epoch": 0.57, + "grad_norm": 1.6394877536500403, + "learning_rate": 4.113532023655255e-06, + "loss": 0.4744, "step": 8033 }, { - "epoch": 0.85, - "grad_norm": 2.769252012832659, - "learning_rate": 6.137786785642985e-07, - "loss": 0.6102, + "epoch": 0.57, + "grad_norm": 2.363291213460963, + "learning_rate": 4.112401085207639e-06, + "loss": 0.5859, "step": 8034 }, { - "epoch": 0.85, - "grad_norm": 2.4509615775141977, - "learning_rate": 6.129608235989881e-07, - "loss": 0.5448, + "epoch": 0.57, + "grad_norm": 1.8423611324651257, + "learning_rate": 4.111270193646005e-06, + "loss": 0.5216, "step": 8035 }, { - "epoch": 0.85, - "grad_norm": 2.141998734885848, - "learning_rate": 6.121434783072077e-07, - "loss": 0.6219, + "epoch": 0.57, + "grad_norm": 1.5363841341377968, + "learning_rate": 4.11013934903009e-06, + "loss": 0.5055, "step": 8036 }, { - "epoch": 0.85, - "grad_norm": 2.090304082790666, - "learning_rate": 6.113266427839126e-07, - "loss": 0.5533, + "epoch": 0.57, + "grad_norm": 1.6009461665178966, + "learning_rate": 4.10900855141963e-06, + "loss": 0.5989, "step": 8037 }, { - "epoch": 0.85, - "grad_norm": 2.290451018341766, - "learning_rate": 6.105103171240018e-07, - "loss": 0.6181, + "epoch": 0.57, + "grad_norm": 2.4231981006339036, + "learning_rate": 4.107877800874358e-06, + "loss": 0.4829, "step": 8038 }, { - "epoch": 0.85, - "grad_norm": 5.270211096142163, - "learning_rate": 6.096945014223149e-07, - "loss": 0.614, + "epoch": 0.57, + "grad_norm": 1.5113720019239123, + "learning_rate": 4.106747097454002e-06, + "loss": 0.4563, "step": 8039 }, { - "epoch": 0.85, - "grad_norm": 2.9037913388153433, - "learning_rate": 6.088791957736301e-07, - "loss": 0.6391, + "epoch": 0.57, + "grad_norm": 1.4819132766217822, + "learning_rate": 4.105616441218294e-06, + "loss": 0.4998, "step": 8040 }, { - "epoch": 0.85, - "grad_norm": 2.3997821521809874, - "learning_rate": 6.080644002726655e-07, - "loss": 0.6159, + "epoch": 0.57, + "grad_norm": 2.3483951111166497, + "learning_rate": 4.104485832226955e-06, + "loss": 0.5791, "step": 8041 }, { - "epoch": 0.85, - "grad_norm": 2.436406794174699, - "learning_rate": 6.072501150140824e-07, - "loss": 0.6379, + "epoch": 0.57, + "grad_norm": 2.393313078206014, + "learning_rate": 4.103355270539709e-06, + "loss": 0.5722, "step": 8042 }, { - "epoch": 0.85, - "grad_norm": 3.1984050878329087, - "learning_rate": 6.064363400924839e-07, - "loss": 0.6351, + "epoch": 0.57, + "grad_norm": 1.6954600755388396, + "learning_rate": 4.1022247562162745e-06, + "loss": 0.5307, "step": 8043 }, { - "epoch": 0.85, - "grad_norm": 2.9565521370159096, - "learning_rate": 6.056230756024123e-07, - "loss": 0.5821, + "epoch": 0.57, + "grad_norm": 0.7638012077510357, + "learning_rate": 4.1010942893163706e-06, + "loss": 0.4664, "step": 8044 }, { - "epoch": 0.85, - "grad_norm": 3.4666008560066732, - "learning_rate": 6.048103216383472e-07, - "loss": 0.5522, + "epoch": 0.57, + "grad_norm": 2.157643875769233, + "learning_rate": 4.099963869899713e-06, + "loss": 0.5184, "step": 8045 }, { - "epoch": 0.85, - "grad_norm": 2.3667556756148342, - "learning_rate": 6.03998078294713e-07, - "loss": 0.5512, + "epoch": 0.57, + "grad_norm": 1.6707527628570489, + "learning_rate": 4.098833498026015e-06, + "loss": 0.5773, "step": 8046 }, { - "epoch": 0.85, - "grad_norm": 2.6465130816201254, - "learning_rate": 6.031863456658754e-07, - "loss": 0.5849, + "epoch": 0.57, + "grad_norm": 0.6734425731387184, + "learning_rate": 4.097703173754982e-06, + "loss": 0.4188, "step": 8047 }, { - "epoch": 0.85, - "grad_norm": 3.036938049317216, - "learning_rate": 6.023751238461389e-07, - "loss": 0.5304, + "epoch": 0.57, + "grad_norm": 1.796338518494986, + "learning_rate": 4.096572897146327e-06, + "loss": 0.5777, "step": 8048 }, { - "epoch": 0.85, - "grad_norm": 2.075249746973687, - "learning_rate": 6.015644129297482e-07, - "loss": 0.6031, + "epoch": 0.57, + "grad_norm": 6.461908555341252, + "learning_rate": 4.095442668259753e-06, + "loss": 0.5406, "step": 8049 }, { - "epoch": 0.85, - "grad_norm": 2.8124802810968377, - "learning_rate": 6.007542130108885e-07, - "loss": 0.6553, + "epoch": 0.57, + "grad_norm": 1.6134720463989238, + "learning_rate": 4.0943124871549604e-06, + "loss": 0.5082, "step": 8050 }, { - "epoch": 0.85, - "grad_norm": 6.474131988219833, - "learning_rate": 5.999445241836877e-07, - "loss": 0.5124, + "epoch": 0.57, + "grad_norm": 1.8931649146033012, + "learning_rate": 4.093182353891652e-06, + "loss": 0.5537, "step": 8051 }, { - "epoch": 0.85, - "grad_norm": 2.668817739499102, - "learning_rate": 5.991353465422134e-07, - "loss": 0.5076, + "epoch": 0.57, + "grad_norm": 1.4214439085980473, + "learning_rate": 4.092052268529525e-06, + "loss": 0.5165, "step": 8052 }, { - "epoch": 0.85, - "grad_norm": 2.411857351900381, - "learning_rate": 5.983266801804732e-07, - "loss": 0.6072, + "epoch": 0.57, + "grad_norm": 1.7005718643240582, + "learning_rate": 4.090922231128274e-06, + "loss": 0.5365, "step": 8053 }, { - "epoch": 0.85, - "grad_norm": 2.4264765584589165, - "learning_rate": 5.975185251924143e-07, - "loss": 0.6211, + "epoch": 0.57, + "grad_norm": 0.6762643748941881, + "learning_rate": 4.089792241747588e-06, + "loss": 0.4468, "step": 8054 }, { - "epoch": 0.85, - "grad_norm": 3.102878662708729, - "learning_rate": 5.967108816719264e-07, - "loss": 0.6705, + "epoch": 0.57, + "grad_norm": 2.257922344305473, + "learning_rate": 4.0886623004471635e-06, + "loss": 0.5116, "step": 8055 }, { - "epoch": 0.85, - "grad_norm": 3.401655319504964, - "learning_rate": 5.959037497128401e-07, - "loss": 0.6786, + "epoch": 0.57, + "grad_norm": 1.8853299942945974, + "learning_rate": 4.087532407286684e-06, + "loss": 0.576, "step": 8056 }, { - "epoch": 0.85, - "grad_norm": 3.152666465422462, - "learning_rate": 5.950971294089258e-07, - "loss": 0.6148, + "epoch": 0.57, + "grad_norm": 1.649115283835289, + "learning_rate": 4.086402562325834e-06, + "loss": 0.5085, "step": 8057 }, { - "epoch": 0.85, - "grad_norm": 2.675848160084293, - "learning_rate": 5.942910208538943e-07, - "loss": 0.595, + "epoch": 0.57, + "grad_norm": 1.5154626800660163, + "learning_rate": 4.085272765624295e-06, + "loss": 0.4427, "step": 8058 }, { - "epoch": 0.85, - "grad_norm": 3.2338755878945475, - "learning_rate": 5.934854241413951e-07, - "loss": 0.62, + "epoch": 0.57, + "grad_norm": 1.4809354568421493, + "learning_rate": 4.084143017241749e-06, + "loss": 0.4704, "step": 8059 }, { - "epoch": 0.85, - "grad_norm": 2.7539432304617, - "learning_rate": 5.926803393650215e-07, - "loss": 0.563, + "epoch": 0.57, + "grad_norm": 1.5944575227252882, + "learning_rate": 4.083013317237874e-06, + "loss": 0.5561, "step": 8060 }, { - "epoch": 0.85, - "grad_norm": 2.418072027591026, - "learning_rate": 5.918757666183067e-07, - "loss": 0.5881, + "epoch": 0.57, + "grad_norm": 1.7195858766709167, + "learning_rate": 4.081883665672342e-06, + "loss": 0.6262, "step": 8061 }, { - "epoch": 0.85, - "grad_norm": 2.4209012407142225, - "learning_rate": 5.91071705994723e-07, - "loss": 0.5809, + "epoch": 0.57, + "grad_norm": 2.4424484928984533, + "learning_rate": 4.080754062604823e-06, + "loss": 0.4557, "step": 8062 }, { - "epoch": 0.85, - "grad_norm": 4.302902584346649, - "learning_rate": 5.902681575876822e-07, - "loss": 0.5901, + "epoch": 0.57, + "grad_norm": 2.909367134289205, + "learning_rate": 4.079624508094992e-06, + "loss": 0.4716, "step": 8063 }, { - "epoch": 0.85, - "grad_norm": 2.3794113470801763, - "learning_rate": 5.894651214905395e-07, - "loss": 0.5332, + "epoch": 0.57, + "grad_norm": 1.9352521457830154, + "learning_rate": 4.078495002202512e-06, + "loss": 0.5363, "step": 8064 }, { - "epoch": 0.85, - "grad_norm": 2.4361869128303573, - "learning_rate": 5.88662597796591e-07, - "loss": 0.5329, + "epoch": 0.57, + "grad_norm": 1.5098524782882923, + "learning_rate": 4.077365544987049e-06, + "loss": 0.4904, "step": 8065 }, { - "epoch": 0.85, - "grad_norm": 5.815110075655275, - "learning_rate": 5.878605865990694e-07, - "loss": 0.6081, + "epoch": 0.57, + "grad_norm": 1.7647283251349108, + "learning_rate": 4.076236136508264e-06, + "loss": 0.5635, "step": 8066 }, { - "epoch": 0.85, - "grad_norm": 2.448042678880235, - "learning_rate": 5.870590879911498e-07, - "loss": 0.5438, + "epoch": 0.57, + "grad_norm": 1.7058665116336174, + "learning_rate": 4.075106776825817e-06, + "loss": 0.549, "step": 8067 }, { - "epoch": 0.85, - "grad_norm": 2.6312265037825413, - "learning_rate": 5.862581020659491e-07, - "loss": 0.6526, + "epoch": 0.57, + "grad_norm": 2.264659861014106, + "learning_rate": 4.0739774659993635e-06, + "loss": 0.5458, "step": 8068 }, { - "epoch": 0.85, - "grad_norm": 3.162596770198441, - "learning_rate": 5.854576289165232e-07, - "loss": 0.6465, + "epoch": 0.57, + "grad_norm": 1.5899234927582957, + "learning_rate": 4.072848204088557e-06, + "loss": 0.5528, "step": 8069 }, { - "epoch": 0.85, - "grad_norm": 2.5578744017745305, - "learning_rate": 5.846576686358696e-07, - "loss": 0.6271, + "epoch": 0.57, + "grad_norm": 2.0866235883243163, + "learning_rate": 4.071718991153051e-06, + "loss": 0.5503, "step": 8070 }, { - "epoch": 0.85, - "grad_norm": 2.8100954598723407, - "learning_rate": 5.838582213169247e-07, - "loss": 0.6043, + "epoch": 0.57, + "grad_norm": 2.668717194017669, + "learning_rate": 4.070589827252494e-06, + "loss": 0.6699, "step": 8071 }, { - "epoch": 0.85, - "grad_norm": 3.2170401537564057, - "learning_rate": 5.830592870525647e-07, - "loss": 0.6683, + "epoch": 0.57, + "grad_norm": 1.700772535668077, + "learning_rate": 4.069460712446531e-06, + "loss": 0.4754, "step": 8072 }, { - "epoch": 0.85, - "grad_norm": 2.379729669844667, - "learning_rate": 5.822608659356093e-07, - "loss": 0.5669, + "epoch": 0.57, + "grad_norm": 2.2490471321063645, + "learning_rate": 4.068331646794805e-06, + "loss": 0.6182, "step": 8073 }, { - "epoch": 0.85, - "grad_norm": 2.759833858853163, - "learning_rate": 5.814629580588165e-07, - "loss": 0.679, + "epoch": 0.57, + "grad_norm": 1.5728330563970998, + "learning_rate": 4.067202630356959e-06, + "loss": 0.5103, "step": 8074 }, { - "epoch": 0.85, - "grad_norm": 2.379428037812773, - "learning_rate": 5.80665563514885e-07, - "loss": 0.5477, + "epoch": 0.57, + "grad_norm": 1.5537796211387174, + "learning_rate": 4.066073663192633e-06, + "loss": 0.5098, "step": 8075 }, { - "epoch": 0.85, - "grad_norm": 0.9686713188136623, - "learning_rate": 5.798686823964517e-07, - "loss": 0.5656, + "epoch": 0.57, + "grad_norm": 1.877821791552813, + "learning_rate": 4.064944745361459e-06, + "loss": 0.523, "step": 8076 }, { - "epoch": 0.85, - "grad_norm": 2.208901084034373, - "learning_rate": 5.79072314796098e-07, - "loss": 0.6539, + "epoch": 0.57, + "grad_norm": 2.3091120202092426, + "learning_rate": 4.063815876923071e-06, + "loss": 0.5843, "step": 8077 }, { - "epoch": 0.85, - "grad_norm": 2.319119693164557, - "learning_rate": 5.78276460806343e-07, - "loss": 0.5599, + "epoch": 0.57, + "grad_norm": 1.5654243189348296, + "learning_rate": 4.062687057937102e-06, + "loss": 0.5315, "step": 8078 }, { - "epoch": 0.85, - "grad_norm": 3.486494110324032, - "learning_rate": 5.77481120519649e-07, - "loss": 0.6557, + "epoch": 0.57, + "grad_norm": 0.7455138461808073, + "learning_rate": 4.061558288463179e-06, + "loss": 0.4157, "step": 8079 }, { - "epoch": 0.85, - "grad_norm": 2.356169785828175, - "learning_rate": 5.766862940284124e-07, - "loss": 0.5505, + "epoch": 0.57, + "grad_norm": 2.3270851778288706, + "learning_rate": 4.060429568560926e-06, + "loss": 0.5341, "step": 8080 }, { - "epoch": 0.85, - "grad_norm": 3.308551249984606, - "learning_rate": 5.758919814249753e-07, - "loss": 0.6307, + "epoch": 0.57, + "grad_norm": 3.4657379783177267, + "learning_rate": 4.059300898289966e-06, + "loss": 0.5093, "step": 8081 }, { - "epoch": 0.85, - "grad_norm": 3.1735857620440453, - "learning_rate": 5.750981828016189e-07, - "loss": 0.5625, + "epoch": 0.57, + "grad_norm": 1.6436844312155177, + "learning_rate": 4.058172277709922e-06, + "loss": 0.5495, "step": 8082 }, { - "epoch": 0.85, - "grad_norm": 3.0910390532362695, - "learning_rate": 5.743048982505656e-07, - "loss": 0.5263, + "epoch": 0.57, + "grad_norm": 3.1101261714968746, + "learning_rate": 4.057043706880409e-06, + "loss": 0.5013, "step": 8083 }, { - "epoch": 0.85, - "grad_norm": 3.0386446705294796, - "learning_rate": 5.73512127863976e-07, - "loss": 0.5617, + "epoch": 0.57, + "grad_norm": 2.085097678879066, + "learning_rate": 4.055915185861044e-06, + "loss": 0.5216, "step": 8084 }, { - "epoch": 0.85, - "grad_norm": 2.7279151957638255, - "learning_rate": 5.727198717339511e-07, - "loss": 0.6422, + "epoch": 0.57, + "grad_norm": 1.6786776439011553, + "learning_rate": 4.054786714711434e-06, + "loss": 0.4531, "step": 8085 }, { - "epoch": 0.85, - "grad_norm": 2.677171114462176, - "learning_rate": 5.719281299525331e-07, - "loss": 0.5548, + "epoch": 0.57, + "grad_norm": 1.7166592418849398, + "learning_rate": 4.053658293491196e-06, + "loss": 0.5318, "step": 8086 }, { - "epoch": 0.85, - "grad_norm": 3.209112258602768, - "learning_rate": 5.711369026117053e-07, - "loss": 0.6056, + "epoch": 0.57, + "grad_norm": 1.830603445106854, + "learning_rate": 4.052529922259932e-06, + "loss": 0.5251, "step": 8087 }, { - "epoch": 0.85, - "grad_norm": 2.9912493406236726, - "learning_rate": 5.703461898033902e-07, - "loss": 0.6632, + "epoch": 0.57, + "grad_norm": 1.9615083413640595, + "learning_rate": 4.051401601077249e-06, + "loss": 0.5166, "step": 8088 }, { - "epoch": 0.85, - "grad_norm": 3.8418625332520318, - "learning_rate": 5.695559916194488e-07, - "loss": 0.6912, + "epoch": 0.57, + "grad_norm": 1.903908591338246, + "learning_rate": 4.0502733300027465e-06, + "loss": 0.4891, "step": 8089 }, { - "epoch": 0.85, - "grad_norm": 2.516446973072753, - "learning_rate": 5.687663081516853e-07, - "loss": 0.6293, + "epoch": 0.57, + "grad_norm": 1.8007305972977772, + "learning_rate": 4.049145109096026e-06, + "loss": 0.4775, "step": 8090 }, { - "epoch": 0.85, - "grad_norm": 3.7840898044343776, - "learning_rate": 5.679771394918427e-07, - "loss": 0.5642, + "epoch": 0.57, + "grad_norm": 4.906094688916082, + "learning_rate": 4.048016938416683e-06, + "loss": 0.5598, "step": 8091 }, { - "epoch": 0.85, - "grad_norm": 2.6457572435732244, - "learning_rate": 5.671884857316051e-07, - "loss": 0.559, + "epoch": 0.57, + "grad_norm": 1.7933271907649881, + "learning_rate": 4.046888818024309e-06, + "loss": 0.5456, "step": 8092 }, { - "epoch": 0.85, - "grad_norm": 2.3271517036024933, - "learning_rate": 5.66400346962595e-07, - "loss": 0.7182, + "epoch": 0.57, + "grad_norm": 2.301499846123311, + "learning_rate": 4.045760747978499e-06, + "loss": 0.5433, "step": 8093 }, { - "epoch": 0.85, - "grad_norm": 2.7706715409747082, - "learning_rate": 5.656127232763759e-07, - "loss": 0.6649, + "epoch": 0.57, + "grad_norm": 2.3762873451550433, + "learning_rate": 4.04463272833884e-06, + "loss": 0.4861, "step": 8094 }, { - "epoch": 0.85, - "grad_norm": 2.34603048762334, - "learning_rate": 5.64825614764452e-07, - "loss": 0.6284, + "epoch": 0.57, + "grad_norm": 2.090876187819023, + "learning_rate": 4.043504759164917e-06, + "loss": 0.5752, "step": 8095 }, { - "epoch": 0.85, - "grad_norm": 2.0215689336292253, - "learning_rate": 5.640390215182683e-07, - "loss": 0.6201, + "epoch": 0.57, + "grad_norm": 1.7707101818185091, + "learning_rate": 4.042376840516312e-06, + "loss": 0.5451, "step": 8096 }, { - "epoch": 0.85, - "grad_norm": 9.53100525886137, - "learning_rate": 5.632529436292083e-07, - "loss": 0.5778, + "epoch": 0.57, + "grad_norm": 1.7324969627030422, + "learning_rate": 4.041248972452609e-06, + "loss": 0.5182, "step": 8097 }, { - "epoch": 0.85, - "grad_norm": 2.126583426550784, - "learning_rate": 5.624673811885945e-07, - "loss": 0.61, + "epoch": 0.57, + "grad_norm": 1.9723449710012062, + "learning_rate": 4.040121155033384e-06, + "loss": 0.6016, "step": 8098 }, { - "epoch": 0.85, - "grad_norm": 2.4029139519399867, - "learning_rate": 5.616823342876932e-07, - "loss": 0.5994, + "epoch": 0.57, + "grad_norm": 1.6496725918523965, + "learning_rate": 4.0389933883182124e-06, + "loss": 0.4949, "step": 8099 }, { - "epoch": 0.85, - "grad_norm": 2.3048343587136615, - "learning_rate": 5.608978030177087e-07, - "loss": 0.5854, + "epoch": 0.57, + "grad_norm": 1.6785997983042253, + "learning_rate": 4.037865672366664e-06, + "loss": 0.553, "step": 8100 }, { - "epoch": 0.85, - "grad_norm": 2.276243991774919, - "learning_rate": 5.601137874697859e-07, - "loss": 0.6745, + "epoch": 0.57, + "grad_norm": 0.7051041385239664, + "learning_rate": 4.036738007238314e-06, + "loss": 0.4251, "step": 8101 }, { - "epoch": 0.85, - "grad_norm": 2.5942109454123052, - "learning_rate": 5.593302877350076e-07, - "loss": 0.6314, + "epoch": 0.57, + "grad_norm": 1.6138575894300191, + "learning_rate": 4.035610392992725e-06, + "loss": 0.5814, "step": 8102 }, { - "epoch": 0.85, - "grad_norm": 2.9318122332058856, - "learning_rate": 5.585473039044004e-07, - "loss": 0.5933, + "epoch": 0.58, + "grad_norm": 1.643788208719384, + "learning_rate": 4.0344828296894615e-06, + "loss": 0.487, "step": 8103 }, { - "epoch": 0.85, - "grad_norm": 2.4441412822757806, - "learning_rate": 5.577648360689281e-07, - "loss": 0.6418, + "epoch": 0.58, + "grad_norm": 1.6718021266672007, + "learning_rate": 4.033355317388088e-06, + "loss": 0.5356, "step": 8104 }, { - "epoch": 0.85, - "grad_norm": 4.2034597908955575, - "learning_rate": 5.569828843194969e-07, - "loss": 0.5708, + "epoch": 0.58, + "grad_norm": 1.7762244068186792, + "learning_rate": 4.0322278561481625e-06, + "loss": 0.474, "step": 8105 }, { - "epoch": 0.85, - "grad_norm": 2.6115667573813166, - "learning_rate": 5.562014487469502e-07, - "loss": 0.6829, + "epoch": 0.58, + "grad_norm": 2.1539545801851334, + "learning_rate": 4.031100446029242e-06, + "loss": 0.5352, "step": 8106 }, { - "epoch": 0.85, - "grad_norm": 2.6541755315222266, - "learning_rate": 5.554205294420733e-07, - "loss": 0.5536, + "epoch": 0.58, + "grad_norm": 1.9318287547687025, + "learning_rate": 4.029973087090875e-06, + "loss": 0.5186, "step": 8107 }, { - "epoch": 0.85, - "grad_norm": 2.9752658226736735, - "learning_rate": 5.546401264955909e-07, - "loss": 0.5814, + "epoch": 0.58, + "grad_norm": 2.519152239462843, + "learning_rate": 4.02884577939262e-06, + "loss": 0.5295, "step": 8108 }, { - "epoch": 0.85, - "grad_norm": 3.8346292608939656, - "learning_rate": 5.538602399981696e-07, - "loss": 0.593, + "epoch": 0.58, + "grad_norm": 2.036230396688792, + "learning_rate": 4.027718522994021e-06, + "loss": 0.5519, "step": 8109 }, { - "epoch": 0.85, - "grad_norm": 2.279904270343523, - "learning_rate": 5.530808700404128e-07, - "loss": 0.6239, + "epoch": 0.58, + "grad_norm": 1.775029136828087, + "learning_rate": 4.026591317954623e-06, + "loss": 0.6353, "step": 8110 }, { - "epoch": 0.85, - "grad_norm": 2.4794073490515975, - "learning_rate": 5.523020167128651e-07, - "loss": 0.5705, + "epoch": 0.58, + "grad_norm": 0.7274629567635523, + "learning_rate": 4.025464164333969e-06, + "loss": 0.4375, "step": 8111 }, { - "epoch": 0.85, - "grad_norm": 6.4668680618890395, - "learning_rate": 5.51523680106012e-07, - "loss": 0.6668, + "epoch": 0.58, + "grad_norm": 1.772577413057217, + "learning_rate": 4.024337062191602e-06, + "loss": 0.5479, "step": 8112 }, { - "epoch": 0.85, - "grad_norm": 3.484931444733449, - "learning_rate": 5.507458603102783e-07, - "loss": 0.6922, + "epoch": 0.58, + "grad_norm": 1.6638612791963472, + "learning_rate": 4.023210011587058e-06, + "loss": 0.5023, "step": 8113 }, { - "epoch": 0.85, - "grad_norm": 4.674438977252659, - "learning_rate": 5.499685574160312e-07, - "loss": 0.5529, + "epoch": 0.58, + "grad_norm": 1.7557000247303571, + "learning_rate": 4.022083012579871e-06, + "loss": 0.4865, "step": 8114 }, { - "epoch": 0.85, - "grad_norm": 2.284875736493241, - "learning_rate": 5.491917715135719e-07, - "loss": 0.5868, + "epoch": 0.58, + "grad_norm": 2.137306999101701, + "learning_rate": 4.020956065229571e-06, + "loss": 0.5777, "step": 8115 }, { - "epoch": 0.85, - "grad_norm": 2.901045185802944, - "learning_rate": 5.484155026931459e-07, - "loss": 0.6206, + "epoch": 0.58, + "grad_norm": 2.468007829714127, + "learning_rate": 4.019829169595691e-06, + "loss": 0.5192, "step": 8116 }, { - "epoch": 0.85, - "grad_norm": 2.196587431457302, - "learning_rate": 5.476397510449389e-07, - "loss": 0.6526, + "epoch": 0.58, + "grad_norm": 1.4068318882338702, + "learning_rate": 4.018702325737753e-06, + "loss": 0.4926, "step": 8117 }, { - "epoch": 0.85, - "grad_norm": 3.1539318616200513, - "learning_rate": 5.468645166590758e-07, - "loss": 0.6617, + "epoch": 0.58, + "grad_norm": 0.791820152633847, + "learning_rate": 4.017575533715284e-06, + "loss": 0.4129, "step": 8118 }, { - "epoch": 0.85, - "grad_norm": 2.4155197419898746, - "learning_rate": 5.46089799625621e-07, - "loss": 0.5818, + "epoch": 0.58, + "grad_norm": 1.9208662027326253, + "learning_rate": 4.016448793587806e-06, + "loss": 0.5429, "step": 8119 }, { - "epoch": 0.85, - "grad_norm": 2.6201210125396237, - "learning_rate": 5.453156000345772e-07, - "loss": 0.5792, + "epoch": 0.58, + "grad_norm": 1.6546036421765657, + "learning_rate": 4.015322105414835e-06, + "loss": 0.605, "step": 8120 }, { - "epoch": 0.85, - "grad_norm": 2.457503317771027, - "learning_rate": 5.445419179758893e-07, - "loss": 0.5982, + "epoch": 0.58, + "grad_norm": 1.6696042219875784, + "learning_rate": 4.014195469255886e-06, + "loss": 0.5285, "step": 8121 }, { - "epoch": 0.85, - "grad_norm": 2.551730796116664, - "learning_rate": 5.437687535394431e-07, - "loss": 0.5919, + "epoch": 0.58, + "grad_norm": 1.6481139999575902, + "learning_rate": 4.0130688851704724e-06, + "loss": 0.4958, "step": 8122 }, { - "epoch": 0.85, - "grad_norm": 2.726936420412654, - "learning_rate": 5.429961068150619e-07, - "loss": 0.5739, + "epoch": 0.58, + "grad_norm": 1.5972058152175985, + "learning_rate": 4.011942353218105e-06, + "loss": 0.5184, "step": 8123 }, { - "epoch": 0.85, - "grad_norm": 2.536441146532936, - "learning_rate": 5.422239778925076e-07, - "loss": 0.6811, + "epoch": 0.58, + "grad_norm": 1.806337656381638, + "learning_rate": 4.010815873458291e-06, + "loss": 0.5061, "step": 8124 }, { - "epoch": 0.85, - "grad_norm": 2.9590736750871485, - "learning_rate": 5.414523668614857e-07, - "loss": 0.5417, + "epoch": 0.58, + "grad_norm": 8.512126463333523, + "learning_rate": 4.009689445950534e-06, + "loss": 0.5566, "step": 8125 }, { - "epoch": 0.86, - "grad_norm": 3.7180436990498595, - "learning_rate": 5.406812738116396e-07, - "loss": 0.6244, + "epoch": 0.58, + "grad_norm": 3.7163315869538893, + "learning_rate": 4.008563070754335e-06, + "loss": 0.5338, "step": 8126 }, { - "epoch": 0.86, - "grad_norm": 2.2060907410932096, - "learning_rate": 5.399106988325543e-07, - "loss": 0.5769, + "epoch": 0.58, + "grad_norm": 1.916434813731539, + "learning_rate": 4.0074367479291955e-06, + "loss": 0.5501, "step": 8127 }, { - "epoch": 0.86, - "grad_norm": 2.9386958343089704, - "learning_rate": 5.39140642013749e-07, - "loss": 0.5423, + "epoch": 0.58, + "grad_norm": 1.7272368455457, + "learning_rate": 4.0063104775346115e-06, + "loss": 0.5026, "step": 8128 }, { - "epoch": 0.86, - "grad_norm": 2.5672726543123683, - "learning_rate": 5.383711034446892e-07, - "loss": 0.5791, + "epoch": 0.58, + "grad_norm": 1.9982195733885972, + "learning_rate": 4.005184259630074e-06, + "loss": 0.5044, "step": 8129 }, { - "epoch": 0.86, - "grad_norm": 2.6077162998901873, - "learning_rate": 5.376020832147777e-07, - "loss": 0.6193, + "epoch": 0.58, + "grad_norm": 1.6921723569830935, + "learning_rate": 4.004058094275073e-06, + "loss": 0.5272, "step": 8130 }, { - "epoch": 0.86, - "grad_norm": 3.6993050614435203, - "learning_rate": 5.368335814133569e-07, - "loss": 0.5434, + "epoch": 0.58, + "grad_norm": 1.8178123433534907, + "learning_rate": 4.0029319815291e-06, + "loss": 0.5288, "step": 8131 }, { - "epoch": 0.86, - "grad_norm": 2.2676443993494813, - "learning_rate": 5.360655981297097e-07, - "loss": 0.5061, + "epoch": 0.58, + "grad_norm": 1.6949549207219674, + "learning_rate": 4.001805921451637e-06, + "loss": 0.6009, "step": 8132 }, { - "epoch": 0.86, - "grad_norm": 3.204912156598437, - "learning_rate": 5.352981334530555e-07, - "loss": 0.5548, + "epoch": 0.58, + "grad_norm": 1.522985643926616, + "learning_rate": 4.000679914102168e-06, + "loss": 0.4889, "step": 8133 }, { - "epoch": 0.86, - "grad_norm": 3.3583543994309633, - "learning_rate": 5.345311874725584e-07, - "loss": 0.5469, + "epoch": 0.58, + "grad_norm": 1.9320794635849636, + "learning_rate": 3.9995539595401726e-06, + "loss": 0.547, "step": 8134 }, { - "epoch": 0.86, - "grad_norm": 2.73031391634573, - "learning_rate": 5.337647602773211e-07, - "loss": 0.6302, + "epoch": 0.58, + "grad_norm": 2.7974657650001435, + "learning_rate": 3.998428057825127e-06, + "loss": 0.5689, "step": 8135 }, { - "epoch": 0.86, - "grad_norm": 2.6740573030486434, - "learning_rate": 5.329988519563828e-07, - "loss": 0.5692, + "epoch": 0.58, + "grad_norm": 2.3467777740634563, + "learning_rate": 3.997302209016506e-06, + "loss": 0.5261, "step": 8136 }, { - "epoch": 0.86, - "grad_norm": 1.9923350695353295, - "learning_rate": 5.322334625987241e-07, - "loss": 0.5938, + "epoch": 0.58, + "grad_norm": 1.7439366874576783, + "learning_rate": 3.996176413173777e-06, + "loss": 0.544, "step": 8137 }, { - "epoch": 0.86, - "grad_norm": 2.8706603816339067, - "learning_rate": 5.314685922932666e-07, - "loss": 0.5283, + "epoch": 0.58, + "grad_norm": 1.63155754188709, + "learning_rate": 3.995050670356415e-06, + "loss": 0.5223, "step": 8138 }, { - "epoch": 0.86, - "grad_norm": 2.4156359250535178, - "learning_rate": 5.30704241128871e-07, - "loss": 0.6129, + "epoch": 0.58, + "grad_norm": 1.8198659613526509, + "learning_rate": 3.993924980623879e-06, + "loss": 0.5197, "step": 8139 }, { - "epoch": 0.86, - "grad_norm": 2.789191292597911, - "learning_rate": 5.299404091943383e-07, - "loss": 0.6423, + "epoch": 0.58, + "grad_norm": 1.8403387856611204, + "learning_rate": 3.992799344035637e-06, + "loss": 0.4866, "step": 8140 }, { - "epoch": 0.86, - "grad_norm": 2.3542836512039425, - "learning_rate": 5.291770965784076e-07, - "loss": 0.6534, + "epoch": 0.58, + "grad_norm": 1.776172869877236, + "learning_rate": 3.991673760651144e-06, + "loss": 0.5303, "step": 8141 }, { - "epoch": 0.86, - "grad_norm": 2.6693420950157534, - "learning_rate": 5.284143033697565e-07, - "loss": 0.5847, + "epoch": 0.58, + "grad_norm": 1.6745295431735645, + "learning_rate": 3.990548230529861e-06, + "loss": 0.5725, "step": 8142 }, { - "epoch": 0.86, - "grad_norm": 2.4801931250434497, - "learning_rate": 5.276520296570053e-07, - "loss": 0.6162, + "epoch": 0.58, + "grad_norm": 2.1786269000598724, + "learning_rate": 3.989422753731241e-06, + "loss": 0.5042, "step": 8143 }, { - "epoch": 0.86, - "grad_norm": 2.6330298094643676, - "learning_rate": 5.268902755287148e-07, - "loss": 0.5991, + "epoch": 0.58, + "grad_norm": 1.7414140998266472, + "learning_rate": 3.988297330314736e-06, + "loss": 0.5051, "step": 8144 }, { - "epoch": 0.86, - "grad_norm": 2.514352693873068, - "learning_rate": 5.26129041073381e-07, - "loss": 0.5545, + "epoch": 0.58, + "grad_norm": 1.6939850196508066, + "learning_rate": 3.987171960339792e-06, + "loss": 0.4828, "step": 8145 }, { - "epoch": 0.86, - "grad_norm": 2.334792126153137, - "learning_rate": 5.253683263794418e-07, - "loss": 0.5464, + "epoch": 0.58, + "grad_norm": 1.674751757443478, + "learning_rate": 3.9860466438658595e-06, + "loss": 0.5064, "step": 8146 }, { - "epoch": 0.86, - "grad_norm": 2.269857305345398, - "learning_rate": 5.246081315352758e-07, - "loss": 0.6648, + "epoch": 0.58, + "grad_norm": 2.4831467691195357, + "learning_rate": 3.9849213809523765e-06, + "loss": 0.5542, "step": 8147 }, { - "epoch": 0.86, - "grad_norm": 2.5865095430846656, - "learning_rate": 5.238484566292002e-07, - "loss": 0.7269, + "epoch": 0.58, + "grad_norm": 0.7430506226696686, + "learning_rate": 3.983796171658788e-06, + "loss": 0.4285, "step": 8148 }, { - "epoch": 0.86, - "grad_norm": 2.4282223527075573, - "learning_rate": 5.230893017494731e-07, - "loss": 0.5743, + "epoch": 0.58, + "grad_norm": 1.8957236935076944, + "learning_rate": 3.982671016044527e-06, + "loss": 0.4519, "step": 8149 }, { - "epoch": 0.86, - "grad_norm": 1.0285336151160749, - "learning_rate": 5.223306669842876e-07, - "loss": 0.5332, + "epoch": 0.58, + "grad_norm": 3.2653349339332935, + "learning_rate": 3.981545914169032e-06, + "loss": 0.522, "step": 8150 }, { - "epoch": 0.86, - "grad_norm": 2.5241337376310526, - "learning_rate": 5.215725524217818e-07, - "loss": 0.555, + "epoch": 0.58, + "grad_norm": 2.028449451471558, + "learning_rate": 3.980420866091733e-06, + "loss": 0.5664, "step": 8151 }, { - "epoch": 0.86, - "grad_norm": 3.159756791738227, - "learning_rate": 5.20814958150031e-07, - "loss": 0.6031, + "epoch": 0.58, + "grad_norm": 1.854547175774253, + "learning_rate": 3.979295871872055e-06, + "loss": 0.5812, "step": 8152 }, { - "epoch": 0.86, - "grad_norm": 2.682488901407614, - "learning_rate": 5.200578842570508e-07, - "loss": 0.5975, + "epoch": 0.58, + "grad_norm": 1.7052195584115089, + "learning_rate": 3.9781709315694314e-06, + "loss": 0.5095, "step": 8153 }, { - "epoch": 0.86, - "grad_norm": 0.9504110630710011, - "learning_rate": 5.19301330830796e-07, - "loss": 0.559, + "epoch": 0.58, + "grad_norm": 0.7206811074093877, + "learning_rate": 3.97704604524328e-06, + "loss": 0.4632, "step": 8154 }, { - "epoch": 0.86, - "grad_norm": 2.283295404157167, - "learning_rate": 5.185452979591593e-07, - "loss": 0.647, + "epoch": 0.58, + "grad_norm": 2.0629584482682106, + "learning_rate": 3.9759212129530245e-06, + "loss": 0.5383, "step": 8155 }, { - "epoch": 0.86, - "grad_norm": 2.895291731557172, - "learning_rate": 5.177897857299752e-07, - "loss": 0.5969, + "epoch": 0.58, + "grad_norm": 1.8683143696109235, + "learning_rate": 3.974796434758078e-06, + "loss": 0.4816, "step": 8156 }, { - "epoch": 0.86, - "grad_norm": 2.0204477401700256, - "learning_rate": 5.170347942310177e-07, - "loss": 0.4857, + "epoch": 0.58, + "grad_norm": 2.165844621628089, + "learning_rate": 3.973671710717859e-06, + "loss": 0.5194, "step": 8157 }, { - "epoch": 0.86, - "grad_norm": 3.281207098958877, - "learning_rate": 5.162803235499992e-07, - "loss": 0.5998, + "epoch": 0.58, + "grad_norm": 2.0767247692516224, + "learning_rate": 3.972547040891779e-06, + "loss": 0.5272, "step": 8158 }, { - "epoch": 0.86, - "grad_norm": 2.9244566758863595, - "learning_rate": 5.155263737745703e-07, - "loss": 0.5635, + "epoch": 0.58, + "grad_norm": 2.001089386988167, + "learning_rate": 3.971422425339246e-06, + "loss": 0.5295, "step": 8159 }, { - "epoch": 0.86, - "grad_norm": 2.0701940095232954, - "learning_rate": 5.147729449923244e-07, - "loss": 0.5843, + "epoch": 0.58, + "grad_norm": 1.668943805513631, + "learning_rate": 3.9702978641196635e-06, + "loss": 0.5817, "step": 8160 }, { - "epoch": 0.86, - "grad_norm": 2.5067250985588987, - "learning_rate": 5.140200372907921e-07, - "loss": 0.6793, + "epoch": 0.58, + "grad_norm": 19.973158136073252, + "learning_rate": 3.969173357292439e-06, + "loss": 0.5272, "step": 8161 }, { - "epoch": 0.86, - "grad_norm": 3.105940578606609, - "learning_rate": 5.132676507574463e-07, - "loss": 0.6469, + "epoch": 0.58, + "grad_norm": 1.8432594953999082, + "learning_rate": 3.9680489049169685e-06, + "loss": 0.5652, "step": 8162 }, { - "epoch": 0.86, - "grad_norm": 3.241058072116501, - "learning_rate": 5.125157854796925e-07, - "loss": 0.4969, + "epoch": 0.58, + "grad_norm": 1.7451871406071668, + "learning_rate": 3.966924507052653e-06, + "loss": 0.5328, "step": 8163 }, { - "epoch": 0.86, - "grad_norm": 2.792486187126245, - "learning_rate": 5.11764441544883e-07, - "loss": 0.6391, + "epoch": 0.58, + "grad_norm": 1.7504238234083846, + "learning_rate": 3.965800163758885e-06, + "loss": 0.5325, "step": 8164 }, { - "epoch": 0.86, - "grad_norm": 2.829466955623266, - "learning_rate": 5.11013619040307e-07, - "loss": 0.7384, + "epoch": 0.58, + "grad_norm": 2.1619975966320832, + "learning_rate": 3.964675875095057e-06, + "loss": 0.5021, "step": 8165 }, { - "epoch": 0.86, - "grad_norm": 2.4147912110071825, - "learning_rate": 5.10263318053193e-07, - "loss": 0.6223, + "epoch": 0.58, + "grad_norm": 1.5300654296760539, + "learning_rate": 3.963551641120558e-06, + "loss": 0.5181, "step": 8166 }, { - "epoch": 0.86, - "grad_norm": 2.362586700546461, - "learning_rate": 5.095135386707084e-07, - "loss": 0.588, + "epoch": 0.58, + "grad_norm": 1.950595505467225, + "learning_rate": 3.962427461894772e-06, + "loss": 0.5532, "step": 8167 }, { - "epoch": 0.86, - "grad_norm": 4.445979909568467, - "learning_rate": 5.087642809799587e-07, - "loss": 0.6189, + "epoch": 0.58, + "grad_norm": 1.6765478622562457, + "learning_rate": 3.961303337477084e-06, + "loss": 0.4991, "step": 8168 }, { - "epoch": 0.86, - "grad_norm": 2.732920831150975, - "learning_rate": 5.080155450679924e-07, - "loss": 0.5372, + "epoch": 0.58, + "grad_norm": 1.5569721997780555, + "learning_rate": 3.960179267926873e-06, + "loss": 0.5752, "step": 8169 }, { - "epoch": 0.86, - "grad_norm": 2.705560498251637, - "learning_rate": 5.072673310217957e-07, - "loss": 0.6203, + "epoch": 0.58, + "grad_norm": 1.6397940331506433, + "learning_rate": 3.9590552533035174e-06, + "loss": 0.5541, "step": 8170 }, { - "epoch": 0.86, - "grad_norm": 2.31980030969563, - "learning_rate": 5.065196389282939e-07, - "loss": 0.5589, + "epoch": 0.58, + "grad_norm": 2.017072248336799, + "learning_rate": 3.9579312936663885e-06, + "loss": 0.5682, "step": 8171 }, { - "epoch": 0.86, - "grad_norm": 2.5656538295462887, - "learning_rate": 5.057724688743498e-07, - "loss": 0.6339, + "epoch": 0.58, + "grad_norm": 1.828911044934909, + "learning_rate": 3.956807389074861e-06, + "loss": 0.5684, "step": 8172 }, { - "epoch": 0.86, - "grad_norm": 2.6519050302952736, - "learning_rate": 5.050258209467684e-07, - "loss": 0.7224, + "epoch": 0.58, + "grad_norm": 1.717498185538073, + "learning_rate": 3.955683539588304e-06, + "loss": 0.5439, "step": 8173 }, { - "epoch": 0.86, - "grad_norm": 2.591412532091134, - "learning_rate": 5.042796952322943e-07, - "loss": 0.5866, + "epoch": 0.58, + "grad_norm": 2.0828741983352073, + "learning_rate": 3.9545597452660795e-06, + "loss": 0.541, "step": 8174 }, { - "epoch": 0.86, - "grad_norm": 3.212827161480501, - "learning_rate": 5.035340918176096e-07, - "loss": 0.7674, + "epoch": 0.58, + "grad_norm": 1.9655673268437812, + "learning_rate": 3.9534360061675496e-06, + "loss": 0.5525, "step": 8175 }, { - "epoch": 0.86, - "grad_norm": 2.487115162626594, - "learning_rate": 5.027890107893368e-07, - "loss": 0.6352, + "epoch": 0.58, + "grad_norm": 1.869580758657847, + "learning_rate": 3.9523123223520776e-06, + "loss": 0.5362, "step": 8176 }, { - "epoch": 0.86, - "grad_norm": 2.360221538239416, - "learning_rate": 5.020444522340351e-07, - "loss": 0.5827, + "epoch": 0.58, + "grad_norm": 1.5617350279032653, + "learning_rate": 3.95118869387902e-06, + "loss": 0.5245, "step": 8177 }, { - "epoch": 0.86, - "grad_norm": 7.6272603981979445, - "learning_rate": 5.013004162382068e-07, - "loss": 0.5703, + "epoch": 0.58, + "grad_norm": 1.6893161331631614, + "learning_rate": 3.950065120807729e-06, + "loss": 0.4954, "step": 8178 }, { - "epoch": 0.86, - "grad_norm": 2.297902416786526, - "learning_rate": 5.005569028882928e-07, - "loss": 0.6296, + "epoch": 0.58, + "grad_norm": 2.0611459020755665, + "learning_rate": 3.948941603197553e-06, + "loss": 0.5281, "step": 8179 }, { - "epoch": 0.86, - "grad_norm": 2.272681457850102, - "learning_rate": 4.998139122706713e-07, - "loss": 0.6528, + "epoch": 0.58, + "grad_norm": 1.9700537578602089, + "learning_rate": 3.947818141107847e-06, + "loss": 0.5482, "step": 8180 }, { - "epoch": 0.86, - "grad_norm": 2.8422597029287604, - "learning_rate": 4.990714444716594e-07, - "loss": 0.6148, + "epoch": 0.58, + "grad_norm": 1.6940654205306918, + "learning_rate": 3.9466947345979515e-06, + "loss": 0.5552, "step": 8181 }, { - "epoch": 0.86, - "grad_norm": 2.645227694382434, - "learning_rate": 4.983294995775167e-07, - "loss": 0.6103, + "epoch": 0.58, + "grad_norm": 1.7607093986381808, + "learning_rate": 3.945571383727207e-06, + "loss": 0.5315, "step": 8182 }, { - "epoch": 0.86, - "grad_norm": 2.283592322689147, - "learning_rate": 4.975880776744397e-07, - "loss": 0.5401, + "epoch": 0.58, + "grad_norm": 1.887031920927936, + "learning_rate": 3.944448088554956e-06, + "loss": 0.5623, "step": 8183 }, { - "epoch": 0.86, - "grad_norm": 2.1952627320177975, - "learning_rate": 4.968471788485663e-07, - "loss": 0.5755, + "epoch": 0.58, + "grad_norm": 3.416858777557016, + "learning_rate": 3.943324849140534e-06, + "loss": 0.5101, "step": 8184 }, { - "epoch": 0.86, - "grad_norm": 2.4356967067289714, - "learning_rate": 4.961068031859684e-07, - "loss": 0.6096, + "epoch": 0.58, + "grad_norm": 2.0095193645522076, + "learning_rate": 3.942201665543274e-06, + "loss": 0.5346, "step": 8185 }, { - "epoch": 0.86, - "grad_norm": 2.8567686330899567, - "learning_rate": 4.953669507726633e-07, - "loss": 0.6889, + "epoch": 0.58, + "grad_norm": 1.9876052903800276, + "learning_rate": 3.941078537822504e-06, + "loss": 0.53, "step": 8186 }, { - "epoch": 0.86, - "grad_norm": 3.5528236481312305, - "learning_rate": 4.946276216946034e-07, - "loss": 0.6052, + "epoch": 0.58, + "grad_norm": 1.5162414856273319, + "learning_rate": 3.939955466037557e-06, + "loss": 0.511, "step": 8187 }, { - "epoch": 0.86, - "grad_norm": 3.0251800181776676, - "learning_rate": 4.938888160376842e-07, - "loss": 0.6357, + "epoch": 0.58, + "grad_norm": 1.853874385873753, + "learning_rate": 3.938832450247754e-06, + "loss": 0.5056, "step": 8188 }, { - "epoch": 0.86, - "grad_norm": 3.306752775047198, - "learning_rate": 4.931505338877363e-07, - "loss": 0.4966, + "epoch": 0.58, + "grad_norm": 1.9190621700564683, + "learning_rate": 3.937709490512417e-06, + "loss": 0.4809, "step": 8189 }, { - "epoch": 0.86, - "grad_norm": 3.467733910306402, - "learning_rate": 4.924127753305308e-07, - "loss": 0.5819, + "epoch": 0.58, + "grad_norm": 2.394459428968313, + "learning_rate": 3.936586586890861e-06, + "loss": 0.5567, "step": 8190 }, { - "epoch": 0.86, - "grad_norm": 2.5766576901577807, - "learning_rate": 4.916755404517787e-07, - "loss": 0.6238, + "epoch": 0.58, + "grad_norm": 1.8494934540255525, + "learning_rate": 3.935463739442406e-06, + "loss": 0.5695, "step": 8191 }, { - "epoch": 0.86, - "grad_norm": 6.477146410617243, - "learning_rate": 4.909388293371309e-07, - "loss": 0.5792, + "epoch": 0.58, + "grad_norm": 1.8509905966360063, + "learning_rate": 3.934340948226365e-06, + "loss": 0.5252, "step": 8192 }, { - "epoch": 0.86, - "grad_norm": 2.725439865413354, - "learning_rate": 4.902026420721756e-07, - "loss": 0.589, + "epoch": 0.58, + "grad_norm": 1.976052163462318, + "learning_rate": 3.933218213302047e-06, + "loss": 0.6213, "step": 8193 }, { - "epoch": 0.86, - "grad_norm": 3.1642328795374106, - "learning_rate": 4.894669787424399e-07, - "loss": 0.5731, + "epoch": 0.58, + "grad_norm": 2.01953877815824, + "learning_rate": 3.932095534728753e-06, + "loss": 0.5588, "step": 8194 }, { - "epoch": 0.86, - "grad_norm": 2.7228885313758933, - "learning_rate": 4.887318394333923e-07, - "loss": 0.6069, + "epoch": 0.58, + "grad_norm": 1.8316037971404404, + "learning_rate": 3.930972912565796e-06, + "loss": 0.5666, "step": 8195 }, { - "epoch": 0.86, - "grad_norm": 4.187726267604148, - "learning_rate": 4.879972242304382e-07, - "loss": 0.5925, + "epoch": 0.58, + "grad_norm": 2.0456936604664615, + "learning_rate": 3.929850346872471e-06, + "loss": 0.513, "step": 8196 }, { - "epoch": 0.86, - "grad_norm": 2.4064432279219896, - "learning_rate": 4.872631332189259e-07, - "loss": 0.6251, + "epoch": 0.58, + "grad_norm": 2.06454160468138, + "learning_rate": 3.928727837708077e-06, + "loss": 0.5466, "step": 8197 }, { - "epoch": 0.86, - "grad_norm": 2.0589344787984993, - "learning_rate": 4.865295664841363e-07, - "loss": 0.5597, + "epoch": 0.58, + "grad_norm": 3.154364225608142, + "learning_rate": 3.927605385131907e-06, + "loss": 0.4866, "step": 8198 }, { - "epoch": 0.86, - "grad_norm": 2.3406278362981645, - "learning_rate": 4.857965241112938e-07, - "loss": 0.5722, + "epoch": 0.58, + "grad_norm": 1.9420290084749225, + "learning_rate": 3.9264829892032545e-06, + "loss": 0.5752, "step": 8199 }, { - "epoch": 0.86, - "grad_norm": 3.988444067366749, - "learning_rate": 4.850640061855627e-07, - "loss": 0.6008, + "epoch": 0.58, + "grad_norm": 1.870452377406962, + "learning_rate": 3.925360649981409e-06, + "loss": 0.5503, "step": 8200 }, { - "epoch": 0.86, - "grad_norm": 2.7834751537926765, - "learning_rate": 4.843320127920442e-07, - "loss": 0.5944, + "epoch": 0.58, + "grad_norm": 2.458021597221019, + "learning_rate": 3.924238367525655e-06, + "loss": 0.5456, "step": 8201 }, { - "epoch": 0.86, - "grad_norm": 2.5609186141419173, - "learning_rate": 4.836005440157798e-07, - "loss": 0.6557, + "epoch": 0.58, + "grad_norm": 2.1538717962533833, + "learning_rate": 3.9231161418952755e-06, + "loss": 0.5109, "step": 8202 }, { - "epoch": 0.86, - "grad_norm": 2.3038313600115945, - "learning_rate": 4.828695999417471e-07, - "loss": 0.6159, + "epoch": 0.58, + "grad_norm": 1.8092056277977968, + "learning_rate": 3.921993973149551e-06, + "loss": 0.5592, "step": 8203 }, { - "epoch": 0.86, - "grad_norm": 2.3290658242492674, - "learning_rate": 4.821391806548664e-07, - "loss": 0.6091, + "epoch": 0.58, + "grad_norm": 1.9580695054749024, + "learning_rate": 3.920871861347757e-06, + "loss": 0.5331, "step": 8204 }, { - "epoch": 0.86, - "grad_norm": 2.7246422374040185, - "learning_rate": 4.814092862399971e-07, - "loss": 0.6781, + "epoch": 0.58, + "grad_norm": 2.786130902803287, + "learning_rate": 3.919749806549168e-06, + "loss": 0.5377, "step": 8205 }, { - "epoch": 0.86, - "grad_norm": 2.543683504573766, - "learning_rate": 4.806799167819354e-07, - "loss": 0.5268, + "epoch": 0.58, + "grad_norm": 2.07062162550016, + "learning_rate": 3.918627808813054e-06, + "loss": 0.5918, "step": 8206 }, { - "epoch": 0.86, - "grad_norm": 2.248828186134925, - "learning_rate": 4.799510723654154e-07, - "loss": 0.5292, + "epoch": 0.58, + "grad_norm": 1.95724028687109, + "learning_rate": 3.917505868198686e-06, + "loss": 0.5238, "step": 8207 }, { - "epoch": 0.86, - "grad_norm": 3.487968354722907, - "learning_rate": 4.792227530751137e-07, - "loss": 0.618, + "epoch": 0.58, + "grad_norm": 1.5567314298832478, + "learning_rate": 3.916383984765324e-06, + "loss": 0.4847, "step": 8208 }, { - "epoch": 0.86, - "grad_norm": 3.112110087489705, - "learning_rate": 4.784949589956444e-07, - "loss": 0.6297, + "epoch": 0.58, + "grad_norm": 2.9481423916673912, + "learning_rate": 3.915262158572231e-06, + "loss": 0.57, "step": 8209 }, { - "epoch": 0.86, - "grad_norm": 3.553846046938456, - "learning_rate": 4.777676902115613e-07, - "loss": 0.6159, + "epoch": 0.58, + "grad_norm": 0.7676161373272865, + "learning_rate": 3.914140389678668e-06, + "loss": 0.4419, "step": 8210 }, { - "epoch": 0.86, - "grad_norm": 2.8837009978624635, - "learning_rate": 4.770409468073562e-07, - "loss": 0.6905, + "epoch": 0.58, + "grad_norm": 2.1262314672643328, + "learning_rate": 3.913018678143889e-06, + "loss": 0.5097, "step": 8211 }, { - "epoch": 0.86, - "grad_norm": 3.1032584634222546, - "learning_rate": 4.7631472886745746e-07, - "loss": 0.6244, + "epoch": 0.58, + "grad_norm": 2.5121900912802557, + "learning_rate": 3.911897024027145e-06, + "loss": 0.4887, "step": 8212 }, { - "epoch": 0.86, - "grad_norm": 2.367855236507304, - "learning_rate": 4.755890364762372e-07, - "loss": 0.5847, + "epoch": 0.58, + "grad_norm": 3.275970860385759, + "learning_rate": 3.910775427387688e-06, + "loss": 0.5552, "step": 8213 }, { - "epoch": 0.86, - "grad_norm": 0.9490535768407505, - "learning_rate": 4.748638697180052e-07, - "loss": 0.6026, + "epoch": 0.58, + "grad_norm": 2.1275808481834746, + "learning_rate": 3.909653888284763e-06, + "loss": 0.5601, "step": 8214 }, { - "epoch": 0.86, - "grad_norm": 3.6375758139751895, - "learning_rate": 4.741392286770075e-07, - "loss": 0.5925, + "epoch": 0.58, + "grad_norm": 1.6880901128399293, + "learning_rate": 3.908532406777615e-06, + "loss": 0.5209, "step": 8215 }, { - "epoch": 0.86, - "grad_norm": 2.8159200769119366, - "learning_rate": 4.734151134374304e-07, - "loss": 0.5985, + "epoch": 0.58, + "grad_norm": 1.6858984368705028, + "learning_rate": 3.907410982925483e-06, + "loss": 0.5146, "step": 8216 }, { - "epoch": 0.86, - "grad_norm": 3.1013992053905923, - "learning_rate": 4.7269152408340067e-07, - "loss": 0.718, + "epoch": 0.58, + "grad_norm": 1.5987749931612856, + "learning_rate": 3.906289616787607e-06, + "loss": 0.5245, "step": 8217 }, { - "epoch": 0.86, - "grad_norm": 2.2854215939813325, - "learning_rate": 4.7196846069898216e-07, - "loss": 0.5956, + "epoch": 0.58, + "grad_norm": 1.7412792848397713, + "learning_rate": 3.9051683084232184e-06, + "loss": 0.5087, "step": 8218 }, { - "epoch": 0.86, - "grad_norm": 2.3465869398994483, - "learning_rate": 4.71245923368181e-07, - "loss": 0.535, + "epoch": 0.58, + "grad_norm": 1.661788750501398, + "learning_rate": 3.904047057891548e-06, + "loss": 0.5032, "step": 8219 }, { - "epoch": 0.86, - "grad_norm": 2.521050340647012, - "learning_rate": 4.7052391217493497e-07, - "loss": 0.596, + "epoch": 0.58, + "grad_norm": 1.6137403853968162, + "learning_rate": 3.902925865251827e-06, + "loss": 0.5081, "step": 8220 }, { - "epoch": 0.87, - "grad_norm": 2.4595843214175392, - "learning_rate": 4.698024272031276e-07, - "loss": 0.5647, + "epoch": 0.58, + "grad_norm": 1.7224014946367976, + "learning_rate": 3.901804730563279e-06, + "loss": 0.5126, "step": 8221 }, { - "epoch": 0.87, - "grad_norm": 3.029712654205638, - "learning_rate": 4.690814685365791e-07, - "loss": 0.6448, + "epoch": 0.58, + "grad_norm": 0.7067455959940929, + "learning_rate": 3.900683653885127e-06, + "loss": 0.4342, "step": 8222 }, { - "epoch": 0.87, - "grad_norm": 2.341790414077542, - "learning_rate": 4.683610362590485e-07, - "loss": 0.5764, + "epoch": 0.58, + "grad_norm": 2.1977703535211193, + "learning_rate": 3.899562635276589e-06, + "loss": 0.5098, "step": 8223 }, { - "epoch": 0.87, - "grad_norm": 4.5428852112483105, - "learning_rate": 4.6764113045423274e-07, - "loss": 0.6887, + "epoch": 0.58, + "grad_norm": 2.694149771663255, + "learning_rate": 3.89844167479688e-06, + "loss": 0.5225, "step": 8224 }, { - "epoch": 0.87, - "grad_norm": 2.1809278528567937, - "learning_rate": 4.6692175120576834e-07, - "loss": 0.6309, + "epoch": 0.58, + "grad_norm": 1.6586112243199327, + "learning_rate": 3.8973207725052165e-06, + "loss": 0.5217, "step": 8225 }, { - "epoch": 0.87, - "grad_norm": 2.412349436289653, - "learning_rate": 4.6620289859723114e-07, - "loss": 0.583, + "epoch": 0.58, + "grad_norm": 1.958244641641531, + "learning_rate": 3.896199928460806e-06, + "loss": 0.4828, "step": 8226 }, { - "epoch": 0.87, - "grad_norm": 2.6851844120083275, - "learning_rate": 4.65484572712136e-07, - "loss": 0.5324, + "epoch": 0.58, + "grad_norm": 2.1756423610665876, + "learning_rate": 3.895079142722854e-06, + "loss": 0.524, "step": 8227 }, { - "epoch": 0.87, - "grad_norm": 2.350365679203484, - "learning_rate": 4.6476677363393507e-07, - "loss": 0.6305, + "epoch": 0.58, + "grad_norm": 1.8397552889569622, + "learning_rate": 3.893958415350565e-06, + "loss": 0.5664, "step": 8228 }, { - "epoch": 0.87, - "grad_norm": 2.74850582630496, - "learning_rate": 4.6404950144602e-07, - "loss": 0.6442, + "epoch": 0.58, + "grad_norm": 2.477548116569571, + "learning_rate": 3.8928377464031425e-06, + "loss": 0.5446, "step": 8229 }, { - "epoch": 0.87, - "grad_norm": 2.48312204654485, - "learning_rate": 4.6333275623172137e-07, - "loss": 0.6308, + "epoch": 0.58, + "grad_norm": 1.5011808080842939, + "learning_rate": 3.891717135939782e-06, + "loss": 0.4866, "step": 8230 }, { - "epoch": 0.87, - "grad_norm": 3.447477016671054, - "learning_rate": 4.626165380743086e-07, - "loss": 0.5315, + "epoch": 0.58, + "grad_norm": 1.7289705967649056, + "learning_rate": 3.890596584019675e-06, + "loss": 0.4996, "step": 8231 }, { - "epoch": 0.87, - "grad_norm": 2.6758503396323916, - "learning_rate": 4.6190084705699243e-07, - "loss": 0.6075, + "epoch": 0.58, + "grad_norm": 2.1823279267558355, + "learning_rate": 3.889476090702018e-06, + "loss": 0.607, "step": 8232 }, { - "epoch": 0.87, - "grad_norm": 2.7690378845704013, - "learning_rate": 4.6118568326291577e-07, - "loss": 0.5348, + "epoch": 0.58, + "grad_norm": 1.6457441889339104, + "learning_rate": 3.888355656045996e-06, + "loss": 0.5286, "step": 8233 }, { - "epoch": 0.87, - "grad_norm": 3.288950693928659, - "learning_rate": 4.604710467751661e-07, - "loss": 0.5712, + "epoch": 0.58, + "grad_norm": 2.071570565043589, + "learning_rate": 3.887235280110795e-06, + "loss": 0.5699, "step": 8234 }, { - "epoch": 0.87, - "grad_norm": 2.882335822482473, - "learning_rate": 4.5975693767676746e-07, - "loss": 0.5979, + "epoch": 0.58, + "grad_norm": 1.934879707710038, + "learning_rate": 3.886114962955597e-06, + "loss": 0.4871, "step": 8235 }, { - "epoch": 0.87, - "grad_norm": 3.0747373794610313, - "learning_rate": 4.590433560506841e-07, - "loss": 0.6, + "epoch": 0.58, + "grad_norm": 2.235639303660988, + "learning_rate": 3.8849947046395805e-06, + "loss": 0.5304, "step": 8236 }, { - "epoch": 0.87, - "grad_norm": 2.6607131094292322, - "learning_rate": 4.583303019798174e-07, - "loss": 0.6083, + "epoch": 0.58, + "grad_norm": 1.506300190173808, + "learning_rate": 3.883874505221924e-06, + "loss": 0.5742, "step": 8237 }, { - "epoch": 0.87, - "grad_norm": 3.6597203389257147, - "learning_rate": 4.576177755470068e-07, - "loss": 0.6753, + "epoch": 0.58, + "grad_norm": 1.9653894861772, + "learning_rate": 3.882754364761797e-06, + "loss": 0.5572, "step": 8238 }, { - "epoch": 0.87, - "grad_norm": 3.4666690631206585, - "learning_rate": 4.5690577683503214e-07, - "loss": 0.5823, + "epoch": 0.58, + "grad_norm": 0.6594160132622261, + "learning_rate": 3.8816342833183685e-06, + "loss": 0.4012, "step": 8239 }, { - "epoch": 0.87, - "grad_norm": 2.426851608730652, - "learning_rate": 4.561943059266122e-07, - "loss": 0.6137, + "epoch": 0.58, + "grad_norm": 1.6913808797206589, + "learning_rate": 3.880514260950809e-06, + "loss": 0.5427, "step": 8240 }, { - "epoch": 0.87, - "grad_norm": 3.5313147718873426, - "learning_rate": 4.554833629044031e-07, - "loss": 0.5988, + "epoch": 0.58, + "grad_norm": 1.5088933318397555, + "learning_rate": 3.879394297718279e-06, + "loss": 0.5497, "step": 8241 }, { - "epoch": 0.87, - "grad_norm": 2.4878565766187477, - "learning_rate": 4.547729478509993e-07, - "loss": 0.6371, + "epoch": 0.58, + "grad_norm": 1.6910703261078577, + "learning_rate": 3.878274393679939e-06, + "loss": 0.4862, "step": 8242 }, { - "epoch": 0.87, - "grad_norm": 12.485806184499575, - "learning_rate": 4.540630608489355e-07, - "loss": 0.5358, + "epoch": 0.58, + "grad_norm": 1.5936672063997535, + "learning_rate": 3.877154548894944e-06, + "loss": 0.5143, "step": 8243 }, { - "epoch": 0.87, - "grad_norm": 2.51891715467682, - "learning_rate": 4.533537019806844e-07, - "loss": 0.5765, + "epoch": 0.59, + "grad_norm": 2.1814327725729994, + "learning_rate": 3.876034763422455e-06, + "loss": 0.4788, "step": 8244 }, { - "epoch": 0.87, - "grad_norm": 2.680356743825882, - "learning_rate": 4.52644871328658e-07, - "loss": 0.6421, + "epoch": 0.59, + "grad_norm": 2.787002400771939, + "learning_rate": 3.874915037321617e-06, + "loss": 0.4721, "step": 8245 }, { - "epoch": 0.87, - "grad_norm": 1.0197526172225977, - "learning_rate": 4.5193656897520534e-07, - "loss": 0.5282, + "epoch": 0.59, + "grad_norm": 2.4381711661193552, + "learning_rate": 3.873795370651576e-06, + "loss": 0.4593, "step": 8246 }, { - "epoch": 0.87, - "grad_norm": 2.456701500945603, - "learning_rate": 4.5122879500261396e-07, - "loss": 0.5945, + "epoch": 0.59, + "grad_norm": 1.6609968205895653, + "learning_rate": 3.872675763471483e-06, + "loss": 0.5128, "step": 8247 }, { - "epoch": 0.87, - "grad_norm": 5.368043410381294, - "learning_rate": 4.50521549493112e-07, - "loss": 0.6154, + "epoch": 0.59, + "grad_norm": 1.7655872912203199, + "learning_rate": 3.871556215840475e-06, + "loss": 0.4876, "step": 8248 }, { - "epoch": 0.87, - "grad_norm": 4.207890875247668, - "learning_rate": 4.498148325288665e-07, - "loss": 0.6075, + "epoch": 0.59, + "grad_norm": 1.7096255553307074, + "learning_rate": 3.870436727817691e-06, + "loss": 0.5302, "step": 8249 }, { - "epoch": 0.87, - "grad_norm": 2.2518464473290987, - "learning_rate": 4.491086441919801e-07, - "loss": 0.6067, + "epoch": 0.59, + "grad_norm": 1.6639111687193957, + "learning_rate": 3.869317299462267e-06, + "loss": 0.5435, "step": 8250 }, { - "epoch": 0.87, - "grad_norm": 2.2804388518048135, - "learning_rate": 4.484029845644955e-07, - "loss": 0.6258, + "epoch": 0.59, + "grad_norm": 1.6776739576699145, + "learning_rate": 3.868197930833334e-06, + "loss": 0.52, "step": 8251 }, { - "epoch": 0.87, - "grad_norm": 2.0120465903949816, - "learning_rate": 4.4769785372839493e-07, - "loss": 0.533, + "epoch": 0.59, + "grad_norm": 2.0742309930945333, + "learning_rate": 3.867078621990022e-06, + "loss": 0.5005, "step": 8252 }, { - "epoch": 0.87, - "grad_norm": 2.7135204524309904, - "learning_rate": 4.469932517655978e-07, - "loss": 0.5617, + "epoch": 0.59, + "grad_norm": 1.643471147806347, + "learning_rate": 3.865959372991457e-06, + "loss": 0.5101, "step": 8253 }, { - "epoch": 0.87, - "grad_norm": 2.9167866927413715, - "learning_rate": 4.462891787579654e-07, - "loss": 0.6386, + "epoch": 0.59, + "grad_norm": 0.695603715354903, + "learning_rate": 3.864840183896758e-06, + "loss": 0.4364, "step": 8254 }, { - "epoch": 0.87, - "grad_norm": 2.0215478723122295, - "learning_rate": 4.4558563478729113e-07, - "loss": 0.5989, + "epoch": 0.59, + "grad_norm": 2.0435943324578845, + "learning_rate": 3.863721054765048e-06, + "loss": 0.5438, "step": 8255 }, { - "epoch": 0.87, - "grad_norm": 0.910071063555147, - "learning_rate": 4.4488261993531233e-07, - "loss": 0.5422, + "epoch": 0.59, + "grad_norm": 1.482878781930355, + "learning_rate": 3.862601985655442e-06, + "loss": 0.4802, "step": 8256 }, { - "epoch": 0.87, - "grad_norm": 11.796143039706289, - "learning_rate": 4.441801342837027e-07, - "loss": 0.502, + "epoch": 0.59, + "grad_norm": 1.604015070653445, + "learning_rate": 3.8614829766270545e-06, + "loss": 0.5162, "step": 8257 }, { - "epoch": 0.87, - "grad_norm": 2.342733719631572, - "learning_rate": 4.4347817791407677e-07, - "loss": 0.567, + "epoch": 0.59, + "grad_norm": 1.734114959345256, + "learning_rate": 3.860364027738992e-06, + "loss": 0.5097, "step": 8258 }, { - "epoch": 0.87, - "grad_norm": 4.3267678983826565, - "learning_rate": 4.4277675090798445e-07, - "loss": 0.5816, + "epoch": 0.59, + "grad_norm": 0.7437754843269132, + "learning_rate": 3.859245139050364e-06, + "loss": 0.4449, "step": 8259 }, { - "epoch": 0.87, - "grad_norm": 3.157334595406685, - "learning_rate": 4.4207585334691493e-07, - "loss": 0.5783, + "epoch": 0.59, + "grad_norm": 1.770670914863751, + "learning_rate": 3.858126310620274e-06, + "loss": 0.5197, "step": 8260 }, { - "epoch": 0.87, - "grad_norm": 4.343720857313059, - "learning_rate": 4.41375485312297e-07, - "loss": 0.5595, + "epoch": 0.59, + "grad_norm": 1.661363983153688, + "learning_rate": 3.857007542507822e-06, + "loss": 0.5458, "step": 8261 }, { - "epoch": 0.87, - "grad_norm": 2.109175745260641, - "learning_rate": 4.406756468854989e-07, - "loss": 0.5158, + "epoch": 0.59, + "grad_norm": 1.6369956665577876, + "learning_rate": 3.855888834772102e-06, + "loss": 0.5036, "step": 8262 }, { - "epoch": 0.87, - "grad_norm": 2.3196868345115775, - "learning_rate": 4.3997633814782393e-07, - "loss": 0.5602, + "epoch": 0.59, + "grad_norm": 1.836689503021629, + "learning_rate": 3.854770187472213e-06, + "loss": 0.53, "step": 8263 }, { - "epoch": 0.87, - "grad_norm": 2.7303164750460387, - "learning_rate": 4.392775591805154e-07, - "loss": 0.5038, + "epoch": 0.59, + "grad_norm": 1.5140100630273194, + "learning_rate": 3.8536516006672415e-06, + "loss": 0.539, "step": 8264 }, { - "epoch": 0.87, - "grad_norm": 2.74241779032744, - "learning_rate": 4.385793100647567e-07, - "loss": 0.6497, + "epoch": 0.59, + "grad_norm": 1.6029930742422214, + "learning_rate": 3.852533074416277e-06, + "loss": 0.5154, "step": 8265 }, { - "epoch": 0.87, - "grad_norm": 2.430609319723213, - "learning_rate": 4.378815908816675e-07, - "loss": 0.5677, + "epoch": 0.59, + "grad_norm": 0.7061318499354393, + "learning_rate": 3.8514146087784035e-06, + "loss": 0.4316, "step": 8266 }, { - "epoch": 0.87, - "grad_norm": 2.7793948165517435, - "learning_rate": 4.371844017123095e-07, - "loss": 0.5426, + "epoch": 0.59, + "grad_norm": 1.5168484657272634, + "learning_rate": 3.850296203812704e-06, + "loss": 0.5231, "step": 8267 }, { - "epoch": 0.87, - "grad_norm": 3.4548513277509145, - "learning_rate": 4.3648774263767624e-07, - "loss": 0.5881, + "epoch": 0.59, + "grad_norm": 2.001980604059304, + "learning_rate": 3.849177859578254e-06, + "loss": 0.5258, "step": 8268 }, { - "epoch": 0.87, - "grad_norm": 2.2805067121865363, - "learning_rate": 4.3579161373870526e-07, - "loss": 0.6534, + "epoch": 0.59, + "grad_norm": 1.8790081532355276, + "learning_rate": 3.848059576134128e-06, + "loss": 0.6054, "step": 8269 }, { - "epoch": 0.87, - "grad_norm": 2.521977095171458, - "learning_rate": 4.350960150962702e-07, - "loss": 0.5821, + "epoch": 0.59, + "grad_norm": 2.0142043803420604, + "learning_rate": 3.8469413535394e-06, + "loss": 0.5118, "step": 8270 }, { - "epoch": 0.87, - "grad_norm": 3.045332855637481, - "learning_rate": 4.344009467911858e-07, - "loss": 0.5343, + "epoch": 0.59, + "grad_norm": 2.496804308002027, + "learning_rate": 3.8458231918531354e-06, + "loss": 0.4693, "step": 8271 }, { - "epoch": 0.87, - "grad_norm": 3.071925089409981, - "learning_rate": 4.3370640890420145e-07, - "loss": 0.5714, + "epoch": 0.59, + "grad_norm": 1.5535138433768152, + "learning_rate": 3.844705091134401e-06, + "loss": 0.5256, "step": 8272 }, { - "epoch": 0.87, - "grad_norm": 2.3770918532643552, - "learning_rate": 4.3301240151600587e-07, - "loss": 0.6368, + "epoch": 0.59, + "grad_norm": 1.9083851334242, + "learning_rate": 3.8435870514422576e-06, + "loss": 0.5789, "step": 8273 }, { - "epoch": 0.87, - "grad_norm": 2.0377462716813706, - "learning_rate": 4.3231892470722736e-07, - "loss": 0.5848, + "epoch": 0.59, + "grad_norm": 2.513027750974792, + "learning_rate": 3.842469072835766e-06, + "loss": 0.559, "step": 8274 }, { - "epoch": 0.87, - "grad_norm": 2.3436477279860513, - "learning_rate": 4.316259785584337e-07, - "loss": 0.524, + "epoch": 0.59, + "grad_norm": 1.811181677163189, + "learning_rate": 3.841351155373981e-06, + "loss": 0.5381, "step": 8275 }, { - "epoch": 0.87, - "grad_norm": 2.7364690734810475, - "learning_rate": 4.309335631501277e-07, - "loss": 0.635, + "epoch": 0.59, + "grad_norm": 2.094791418287964, + "learning_rate": 3.840233299115954e-06, + "loss": 0.5158, "step": 8276 }, { - "epoch": 0.87, - "grad_norm": 2.5489141180850075, - "learning_rate": 4.3024167856275166e-07, - "loss": 0.5364, + "epoch": 0.59, + "grad_norm": 1.6680064740146152, + "learning_rate": 3.839115504120731e-06, + "loss": 0.5369, "step": 8277 }, { - "epoch": 0.87, - "grad_norm": 3.3087010035902105, - "learning_rate": 4.2955032487668745e-07, - "loss": 0.5727, + "epoch": 0.59, + "grad_norm": 1.5629202866297183, + "learning_rate": 3.8379977704473635e-06, + "loss": 0.4823, "step": 8278 }, { - "epoch": 0.87, - "grad_norm": 2.3875544612935014, - "learning_rate": 4.2885950217225525e-07, - "loss": 0.573, + "epoch": 0.59, + "grad_norm": 1.5497150034102116, + "learning_rate": 3.836880098154889e-06, + "loss": 0.4512, "step": 8279 }, { - "epoch": 0.87, - "grad_norm": 0.997641790782197, - "learning_rate": 4.281692105297125e-07, - "loss": 0.5424, + "epoch": 0.59, + "grad_norm": 1.8742229183471364, + "learning_rate": 3.835762487302349e-06, + "loss": 0.5399, "step": 8280 }, { - "epoch": 0.87, - "grad_norm": 3.5825231781643305, - "learning_rate": 4.2747945002925507e-07, - "loss": 0.604, + "epoch": 0.59, + "grad_norm": 1.4218655138175456, + "learning_rate": 3.8346449379487815e-06, + "loss": 0.5031, "step": 8281 }, { - "epoch": 0.87, - "grad_norm": 2.751380772733463, - "learning_rate": 4.267902207510166e-07, - "loss": 0.6387, + "epoch": 0.59, + "grad_norm": 1.6887971374776853, + "learning_rate": 3.8335274501532165e-06, + "loss": 0.5506, "step": 8282 }, { - "epoch": 0.87, - "grad_norm": 2.7858717540005844, - "learning_rate": 4.261015227750709e-07, - "loss": 0.5677, + "epoch": 0.59, + "grad_norm": 1.6947331197886903, + "learning_rate": 3.832410023974685e-06, + "loss": 0.5136, "step": 8283 }, { - "epoch": 0.87, - "grad_norm": 1.001147795122434, - "learning_rate": 4.254133561814289e-07, - "loss": 0.5422, + "epoch": 0.59, + "grad_norm": 2.319672675951931, + "learning_rate": 3.831292659472209e-06, + "loss": 0.5602, "step": 8284 }, { - "epoch": 0.87, - "grad_norm": 2.4257602264136264, - "learning_rate": 4.247257210500394e-07, - "loss": 0.5679, + "epoch": 0.59, + "grad_norm": 2.5012915003054204, + "learning_rate": 3.830175356704818e-06, + "loss": 0.5159, "step": 8285 }, { - "epoch": 0.87, - "grad_norm": 0.98287549752436, - "learning_rate": 4.240386174607891e-07, - "loss": 0.5719, + "epoch": 0.59, + "grad_norm": 1.451820205677297, + "learning_rate": 3.829058115731527e-06, + "loss": 0.5224, "step": 8286 }, { - "epoch": 0.87, - "grad_norm": 2.0556012748341397, - "learning_rate": 4.2335204549350415e-07, - "loss": 0.5993, + "epoch": 0.59, + "grad_norm": 1.515511510614305, + "learning_rate": 3.827940936611355e-06, + "loss": 0.5352, "step": 8287 }, { - "epoch": 0.87, - "grad_norm": 2.1184763996018456, - "learning_rate": 4.226660052279491e-07, - "loss": 0.5858, + "epoch": 0.59, + "grad_norm": 1.9916843541890212, + "learning_rate": 3.826823819403313e-06, + "loss": 0.5184, "step": 8288 }, { - "epoch": 0.87, - "grad_norm": 2.4200580318072165, - "learning_rate": 4.219804967438279e-07, - "loss": 0.5608, + "epoch": 0.59, + "grad_norm": 3.0479672820660455, + "learning_rate": 3.825706764166413e-06, + "loss": 0.5348, "step": 8289 }, { - "epoch": 0.87, - "grad_norm": 2.2140173131580885, - "learning_rate": 4.2129552012077636e-07, - "loss": 0.577, + "epoch": 0.59, + "grad_norm": 2.006308031447736, + "learning_rate": 3.824589770959661e-06, + "loss": 0.5571, "step": 8290 }, { - "epoch": 0.87, - "grad_norm": 2.8022432950471288, - "learning_rate": 4.2061107543837633e-07, - "loss": 0.5129, + "epoch": 0.59, + "grad_norm": 1.7815859679696167, + "learning_rate": 3.823472839842061e-06, + "loss": 0.5478, "step": 8291 }, { - "epoch": 0.87, - "grad_norm": 2.3001424430333746, - "learning_rate": 4.1992716277614365e-07, - "loss": 0.5429, + "epoch": 0.59, + "grad_norm": 2.019881105360346, + "learning_rate": 3.82235597087261e-06, + "loss": 0.6519, "step": 8292 }, { - "epoch": 0.87, - "grad_norm": 2.0628372093439378, - "learning_rate": 4.1924378221353425e-07, - "loss": 0.5649, + "epoch": 0.59, + "grad_norm": 1.7803367682114086, + "learning_rate": 3.821239164110308e-06, + "loss": 0.5178, "step": 8293 }, { - "epoch": 0.87, - "grad_norm": 3.0938803545878013, - "learning_rate": 4.185609338299407e-07, - "loss": 0.6533, + "epoch": 0.59, + "grad_norm": 2.343508459924527, + "learning_rate": 3.820122419614147e-06, + "loss": 0.5985, "step": 8294 }, { - "epoch": 0.87, - "grad_norm": 3.2890771263913123, - "learning_rate": 4.178786177046934e-07, - "loss": 0.633, + "epoch": 0.59, + "grad_norm": 1.4659168781332659, + "learning_rate": 3.819005737443117e-06, + "loss": 0.4786, "step": 8295 }, { - "epoch": 0.87, - "grad_norm": 2.1087154492100217, - "learning_rate": 4.1719683391706235e-07, - "loss": 0.5208, + "epoch": 0.59, + "grad_norm": 1.9765844948470523, + "learning_rate": 3.817889117656208e-06, + "loss": 0.5904, "step": 8296 }, { - "epoch": 0.87, - "grad_norm": 3.20130117340025, - "learning_rate": 4.165155825462569e-07, - "loss": 0.66, + "epoch": 0.59, + "grad_norm": 2.0761835756220006, + "learning_rate": 3.816772560312401e-06, + "loss": 0.5514, "step": 8297 }, { - "epoch": 0.87, - "grad_norm": 2.38259009410489, - "learning_rate": 4.158348636714216e-07, - "loss": 0.698, + "epoch": 0.59, + "grad_norm": 2.0785028048004603, + "learning_rate": 3.815656065470677e-06, + "loss": 0.4667, "step": 8298 }, { - "epoch": 0.87, - "grad_norm": 3.955775034898537, - "learning_rate": 4.151546773716392e-07, - "loss": 0.6124, + "epoch": 0.59, + "grad_norm": 1.8197232023299037, + "learning_rate": 3.8145396331900104e-06, + "loss": 0.5123, "step": 8299 }, { - "epoch": 0.87, - "grad_norm": 2.598068006883137, - "learning_rate": 4.1447502372593316e-07, - "loss": 0.5196, + "epoch": 0.59, + "grad_norm": 1.8381055884609208, + "learning_rate": 3.813423263529379e-06, + "loss": 0.5883, "step": 8300 }, { - "epoch": 0.87, - "grad_norm": 2.1646914007025493, - "learning_rate": 4.137959028132632e-07, - "loss": 0.631, + "epoch": 0.59, + "grad_norm": 1.6552915090780767, + "learning_rate": 3.812306956547751e-06, + "loss": 0.5016, "step": 8301 }, { - "epoch": 0.87, - "grad_norm": 2.3381901988980673, - "learning_rate": 4.1311731471253e-07, - "loss": 0.5472, + "epoch": 0.59, + "grad_norm": 1.4162336012327497, + "learning_rate": 3.811190712304095e-06, + "loss": 0.502, "step": 8302 }, { - "epoch": 0.87, - "grad_norm": 4.41287496079573, - "learning_rate": 4.1243925950256616e-07, - "loss": 0.6113, + "epoch": 0.59, + "grad_norm": 0.697693525212653, + "learning_rate": 3.8100745308573713e-06, + "loss": 0.4542, "step": 8303 }, { - "epoch": 0.87, - "grad_norm": 2.709280007140201, - "learning_rate": 4.117617372621474e-07, - "loss": 0.5998, + "epoch": 0.59, + "grad_norm": 1.7718962937211127, + "learning_rate": 3.8089584122665454e-06, + "loss": 0.5464, "step": 8304 }, { - "epoch": 0.87, - "grad_norm": 2.2689051954483386, - "learning_rate": 4.11084748069987e-07, - "loss": 0.6169, + "epoch": 0.59, + "grad_norm": 1.5841583450388192, + "learning_rate": 3.807842356590572e-06, + "loss": 0.5071, "step": 8305 }, { - "epoch": 0.87, - "grad_norm": 2.3500449701909503, - "learning_rate": 4.1040829200473643e-07, - "loss": 0.6554, + "epoch": 0.59, + "grad_norm": 1.9668260452289006, + "learning_rate": 3.8067263638884045e-06, + "loss": 0.5248, "step": 8306 }, { - "epoch": 0.87, - "grad_norm": 2.4784819446542996, - "learning_rate": 4.0973236914498284e-07, - "loss": 0.6293, + "epoch": 0.59, + "grad_norm": 1.7406083781196189, + "learning_rate": 3.805610434218992e-06, + "loss": 0.5199, "step": 8307 }, { - "epoch": 0.87, - "grad_norm": 2.513599880145634, - "learning_rate": 4.090569795692528e-07, - "loss": 0.5934, + "epoch": 0.59, + "grad_norm": 1.6849128273347758, + "learning_rate": 3.804494567641284e-06, + "loss": 0.5718, "step": 8308 }, { - "epoch": 0.87, - "grad_norm": 2.8994935140490363, - "learning_rate": 4.08382123356012e-07, - "loss": 0.6651, + "epoch": 0.59, + "grad_norm": 1.9790809425837808, + "learning_rate": 3.8033787642142256e-06, + "loss": 0.5441, "step": 8309 }, { - "epoch": 0.87, - "grad_norm": 2.715503190088068, - "learning_rate": 4.077078005836638e-07, - "loss": 0.5931, + "epoch": 0.59, + "grad_norm": 1.5270833507466557, + "learning_rate": 3.8022630239967534e-06, + "loss": 0.5503, "step": 8310 }, { - "epoch": 0.87, - "grad_norm": 2.8979413858053675, - "learning_rate": 4.070340113305482e-07, - "loss": 0.6289, + "epoch": 0.59, + "grad_norm": 0.7942538612346829, + "learning_rate": 3.801147347047809e-06, + "loss": 0.4397, "step": 8311 }, { - "epoch": 0.87, - "grad_norm": 2.316723946930344, - "learning_rate": 4.0636075567494384e-07, - "loss": 0.6424, + "epoch": 0.59, + "grad_norm": 0.7047998047507246, + "learning_rate": 3.800031733426324e-06, + "loss": 0.432, "step": 8312 }, { - "epoch": 0.87, - "grad_norm": 2.6873395112200575, - "learning_rate": 4.056880336950675e-07, - "loss": 0.6267, + "epoch": 0.59, + "grad_norm": 0.7353598170369795, + "learning_rate": 3.798916183191228e-06, + "loss": 0.4342, "step": 8313 }, { - "epoch": 0.87, - "grad_norm": 3.2114274473336453, - "learning_rate": 4.05015845469075e-07, - "loss": 0.6438, + "epoch": 0.59, + "grad_norm": 1.8495971007829446, + "learning_rate": 3.797800696401448e-06, + "loss": 0.6421, "step": 8314 }, { - "epoch": 0.87, - "grad_norm": 2.1479506466214247, - "learning_rate": 4.043441910750595e-07, - "loss": 0.6171, + "epoch": 0.59, + "grad_norm": 1.7221839557091267, + "learning_rate": 3.79668527311591e-06, + "loss": 0.5598, "step": 8315 }, { - "epoch": 0.88, - "grad_norm": 7.941279814979407, - "learning_rate": 4.036730705910513e-07, - "loss": 0.6145, + "epoch": 0.59, + "grad_norm": 0.7253606851292389, + "learning_rate": 3.795569913393533e-06, + "loss": 0.4491, "step": 8316 }, { - "epoch": 0.88, - "grad_norm": 2.467418133816867, - "learning_rate": 4.030024840950181e-07, - "loss": 0.592, + "epoch": 0.59, + "grad_norm": 1.9582679962975282, + "learning_rate": 3.794454617293235e-06, + "loss": 0.5722, "step": 8317 }, { - "epoch": 0.88, - "grad_norm": 2.429232092167091, - "learning_rate": 4.0233243166486804e-07, - "loss": 0.5985, + "epoch": 0.59, + "grad_norm": 1.6890616922825243, + "learning_rate": 3.793339384873927e-06, + "loss": 0.5849, "step": 8318 }, { - "epoch": 0.88, - "grad_norm": 2.965427954109061, - "learning_rate": 4.016629133784461e-07, - "loss": 0.6265, + "epoch": 0.59, + "grad_norm": 1.641969971442546, + "learning_rate": 3.7922242161945233e-06, + "loss": 0.5221, "step": 8319 }, { - "epoch": 0.88, - "grad_norm": 2.148171593670399, - "learning_rate": 4.0099392931353454e-07, - "loss": 0.5878, + "epoch": 0.59, + "grad_norm": 1.6668779998482117, + "learning_rate": 3.7911091113139296e-06, + "loss": 0.5677, "step": 8320 }, { - "epoch": 0.88, - "grad_norm": 2.435832787059743, - "learning_rate": 4.0032547954785286e-07, - "loss": 0.6319, + "epoch": 0.59, + "grad_norm": 1.578741142094878, + "learning_rate": 3.7899940702910486e-06, + "loss": 0.4379, "step": 8321 }, { - "epoch": 0.88, - "grad_norm": 2.6890143714422567, - "learning_rate": 3.996575641590611e-07, - "loss": 0.5508, + "epoch": 0.59, + "grad_norm": 2.041813190927234, + "learning_rate": 3.7888790931847787e-06, + "loss": 0.4725, "step": 8322 }, { - "epoch": 0.88, - "grad_norm": 2.2991123215639337, - "learning_rate": 3.9899018322475503e-07, - "loss": 0.5276, + "epoch": 0.59, + "grad_norm": 1.449652314254948, + "learning_rate": 3.78776418005402e-06, + "loss": 0.4552, "step": 8323 }, { - "epoch": 0.88, - "grad_norm": 5.492027408747583, - "learning_rate": 3.983233368224709e-07, - "loss": 0.639, + "epoch": 0.59, + "grad_norm": 1.7819662784169477, + "learning_rate": 3.786649330957666e-06, + "loss": 0.5563, "step": 8324 }, { - "epoch": 0.88, - "grad_norm": 2.436217435669819, - "learning_rate": 3.9765702502967795e-07, - "loss": 0.5844, + "epoch": 0.59, + "grad_norm": 0.7161089929904882, + "learning_rate": 3.7855345459546056e-06, + "loss": 0.4256, "step": 8325 }, { - "epoch": 0.88, - "grad_norm": 2.2735941889345, - "learning_rate": 3.969912479237875e-07, - "loss": 0.5295, + "epoch": 0.59, + "grad_norm": 1.8054122418360026, + "learning_rate": 3.784419825103724e-06, + "loss": 0.4618, "step": 8326 }, { - "epoch": 0.88, - "grad_norm": 2.4581911213333942, - "learning_rate": 3.963260055821477e-07, - "loss": 0.5177, + "epoch": 0.59, + "grad_norm": 1.6116894756376345, + "learning_rate": 3.7833051684639076e-06, + "loss": 0.5044, "step": 8327 }, { - "epoch": 0.88, - "grad_norm": 2.4753926504162496, - "learning_rate": 3.9566129808204624e-07, - "loss": 0.6626, + "epoch": 0.59, + "grad_norm": 1.702375692153408, + "learning_rate": 3.782190576094036e-06, + "loss": 0.5599, "step": 8328 }, { - "epoch": 0.88, - "grad_norm": 2.324542564189604, - "learning_rate": 3.9499712550070513e-07, - "loss": 0.6273, + "epoch": 0.59, + "grad_norm": 1.6805549429488584, + "learning_rate": 3.781076048052981e-06, + "loss": 0.5632, "step": 8329 }, { - "epoch": 0.88, - "grad_norm": 3.14806628673995, - "learning_rate": 3.943334879152849e-07, - "loss": 0.6657, + "epoch": 0.59, + "grad_norm": 3.144650573310913, + "learning_rate": 3.7799615843996227e-06, + "loss": 0.5985, "step": 8330 }, { - "epoch": 0.88, - "grad_norm": 2.2230261640469764, - "learning_rate": 3.936703854028873e-07, - "loss": 0.5797, + "epoch": 0.59, + "grad_norm": 1.6181933139682563, + "learning_rate": 3.7788471851928267e-06, + "loss": 0.4989, "step": 8331 }, { - "epoch": 0.88, - "grad_norm": 2.3480123532810095, - "learning_rate": 3.9300781804054887e-07, - "loss": 0.5624, + "epoch": 0.59, + "grad_norm": 2.433780442993537, + "learning_rate": 3.77773285049146e-06, + "loss": 0.544, "step": 8332 }, { - "epoch": 0.88, - "grad_norm": 3.982281191390795, - "learning_rate": 3.9234578590524486e-07, - "loss": 0.4974, + "epoch": 0.59, + "grad_norm": 1.7597129717842288, + "learning_rate": 3.7766185803543846e-06, + "loss": 0.5181, "step": 8333 }, { - "epoch": 0.88, - "grad_norm": 2.5837995450453755, - "learning_rate": 3.9168428907388755e-07, - "loss": 0.6192, + "epoch": 0.59, + "grad_norm": 1.8865596169888008, + "learning_rate": 3.7755043748404634e-06, + "loss": 0.5611, "step": 8334 }, { - "epoch": 0.88, - "grad_norm": 2.346120506226917, - "learning_rate": 3.9102332762332775e-07, - "loss": 0.5529, + "epoch": 0.59, + "grad_norm": 2.0768283685765687, + "learning_rate": 3.7743902340085497e-06, + "loss": 0.5493, "step": 8335 }, { - "epoch": 0.88, - "grad_norm": 2.9988359051893787, - "learning_rate": 3.903629016303551e-07, - "loss": 0.6231, + "epoch": 0.59, + "grad_norm": 1.737426112080627, + "learning_rate": 3.773276157917496e-06, + "loss": 0.5497, "step": 8336 }, { - "epoch": 0.88, - "grad_norm": 2.929097624377366, - "learning_rate": 3.897030111716971e-07, - "loss": 0.5565, + "epoch": 0.59, + "grad_norm": 1.695639236947354, + "learning_rate": 3.7721621466261528e-06, + "loss": 0.4545, "step": 8337 }, { - "epoch": 0.88, - "grad_norm": 4.490972492353745, - "learning_rate": 3.890436563240141e-07, - "loss": 0.5473, + "epoch": 0.59, + "grad_norm": 2.215743902734911, + "learning_rate": 3.7710482001933644e-06, + "loss": 0.5225, "step": 8338 }, { - "epoch": 0.88, - "grad_norm": 6.123463476780392, - "learning_rate": 3.883848371639104e-07, - "loss": 0.6492, + "epoch": 0.59, + "grad_norm": 1.8474655630518426, + "learning_rate": 3.7699343186779765e-06, + "loss": 0.4775, "step": 8339 }, { - "epoch": 0.88, - "grad_norm": 2.1499837297094007, - "learning_rate": 3.8772655376792535e-07, - "loss": 0.5906, + "epoch": 0.59, + "grad_norm": 1.9957001625415416, + "learning_rate": 3.768820502138825e-06, + "loss": 0.532, "step": 8340 }, { - "epoch": 0.88, - "grad_norm": 2.667246941203882, - "learning_rate": 3.870688062125377e-07, - "loss": 0.6599, + "epoch": 0.59, + "grad_norm": 1.5092663717156172, + "learning_rate": 3.767706750634745e-06, + "loss": 0.5464, "step": 8341 }, { - "epoch": 0.88, - "grad_norm": 2.3272993650955405, - "learning_rate": 3.864115945741609e-07, - "loss": 0.601, + "epoch": 0.59, + "grad_norm": 1.7909071812188677, + "learning_rate": 3.7665930642245716e-06, + "loss": 0.5721, "step": 8342 }, { - "epoch": 0.88, - "grad_norm": 2.81091418829116, - "learning_rate": 3.8575491892914816e-07, - "loss": 0.6082, + "epoch": 0.59, + "grad_norm": 1.848160340738212, + "learning_rate": 3.7654794429671324e-06, + "loss": 0.5534, "step": 8343 }, { - "epoch": 0.88, - "grad_norm": 1.9611755295529303, - "learning_rate": 3.8509877935379083e-07, - "loss": 0.5979, + "epoch": 0.59, + "grad_norm": 1.696298428278636, + "learning_rate": 3.7643658869212496e-06, + "loss": 0.528, "step": 8344 }, { - "epoch": 0.88, - "grad_norm": 2.61950855812886, - "learning_rate": 3.8444317592431724e-07, - "loss": 0.6358, + "epoch": 0.59, + "grad_norm": 1.7882039768489122, + "learning_rate": 3.76325239614575e-06, + "loss": 0.4966, "step": 8345 }, { - "epoch": 0.88, - "grad_norm": 0.9406115494374441, - "learning_rate": 3.837881087168932e-07, - "loss": 0.4899, + "epoch": 0.59, + "grad_norm": 2.676168192679192, + "learning_rate": 3.762138970699448e-06, + "loss": 0.5945, "step": 8346 }, { - "epoch": 0.88, - "grad_norm": 2.38460628767397, - "learning_rate": 3.8313357780762227e-07, - "loss": 0.6332, + "epoch": 0.59, + "grad_norm": 1.5581946306006955, + "learning_rate": 3.761025610641161e-06, + "loss": 0.5474, "step": 8347 }, { - "epoch": 0.88, - "grad_norm": 2.7558226853136016, - "learning_rate": 3.8247958327254586e-07, - "loss": 0.5917, + "epoch": 0.59, + "grad_norm": 1.7175856013286779, + "learning_rate": 3.759912316029698e-06, + "loss": 0.6154, "step": 8348 }, { - "epoch": 0.88, - "grad_norm": 2.5358732790727547, - "learning_rate": 3.8182612518764374e-07, - "loss": 0.545, + "epoch": 0.59, + "grad_norm": 1.6065692917111996, + "learning_rate": 3.7587990869238705e-06, + "loss": 0.496, "step": 8349 }, { - "epoch": 0.88, - "grad_norm": 2.3595312762742418, - "learning_rate": 3.811732036288335e-07, - "loss": 0.608, + "epoch": 0.59, + "grad_norm": 3.3997262808894453, + "learning_rate": 3.757685923382481e-06, + "loss": 0.5626, "step": 8350 }, { - "epoch": 0.88, - "grad_norm": 2.7712731901406173, - "learning_rate": 3.805208186719689e-07, - "loss": 0.6207, + "epoch": 0.59, + "grad_norm": 1.6401445028857886, + "learning_rate": 3.7565728254643292e-06, + "loss": 0.5989, "step": 8351 }, { - "epoch": 0.88, - "grad_norm": 0.9142414884157021, - "learning_rate": 3.7986897039284043e-07, - "loss": 0.5117, + "epoch": 0.59, + "grad_norm": 0.7164778854079493, + "learning_rate": 3.755459793228214e-06, + "loss": 0.4465, "step": 8352 }, { - "epoch": 0.88, - "grad_norm": 2.316330933057498, - "learning_rate": 3.792176588671803e-07, - "loss": 0.6112, + "epoch": 0.59, + "grad_norm": 1.7868719421654695, + "learning_rate": 3.7543468267329296e-06, + "loss": 0.5282, "step": 8353 }, { - "epoch": 0.88, - "grad_norm": 2.9741114811382747, - "learning_rate": 3.785668841706558e-07, - "loss": 0.6341, + "epoch": 0.59, + "grad_norm": 1.8730120549597624, + "learning_rate": 3.753233926037268e-06, + "loss": 0.5141, "step": 8354 }, { - "epoch": 0.88, - "grad_norm": 2.0963466583939874, - "learning_rate": 3.7791664637887137e-07, - "loss": 0.5715, + "epoch": 0.59, + "grad_norm": 1.7329324824598293, + "learning_rate": 3.752121091200015e-06, + "loss": 0.5291, "step": 8355 }, { - "epoch": 0.88, - "grad_norm": 3.1371953655685396, - "learning_rate": 3.7726694556736943e-07, - "loss": 0.4608, + "epoch": 0.59, + "grad_norm": 1.7116749710912391, + "learning_rate": 3.751008322279952e-06, + "loss": 0.5094, "step": 8356 }, { - "epoch": 0.88, - "grad_norm": 2.2052960552688363, - "learning_rate": 3.7661778181163067e-07, - "loss": 0.5932, + "epoch": 0.59, + "grad_norm": 1.8907945600145537, + "learning_rate": 3.7498956193358636e-06, + "loss": 0.5119, "step": 8357 }, { - "epoch": 0.88, - "grad_norm": 2.7273903186451576, - "learning_rate": 3.759691551870737e-07, - "loss": 0.6266, + "epoch": 0.59, + "grad_norm": 1.8949801498358252, + "learning_rate": 3.7487829824265255e-06, + "loss": 0.5254, "step": 8358 }, { - "epoch": 0.88, - "grad_norm": 2.5969037148869796, - "learning_rate": 3.753210657690537e-07, - "loss": 0.5756, + "epoch": 0.59, + "grad_norm": 1.8236537947525284, + "learning_rate": 3.7476704116107066e-06, + "loss": 0.5509, "step": 8359 }, { - "epoch": 0.88, - "grad_norm": 2.5347492281922293, - "learning_rate": 3.746735136328633e-07, - "loss": 0.6715, + "epoch": 0.59, + "grad_norm": 2.024309841428457, + "learning_rate": 3.7465579069471813e-06, + "loss": 0.5624, "step": 8360 }, { - "epoch": 0.88, - "grad_norm": 2.8106795258984225, - "learning_rate": 3.740264988537329e-07, - "loss": 0.5104, + "epoch": 0.59, + "grad_norm": 1.8894171708903045, + "learning_rate": 3.7454454684947163e-06, + "loss": 0.6126, "step": 8361 }, { - "epoch": 0.88, - "grad_norm": 4.00381965510081, - "learning_rate": 3.7338002150683174e-07, - "loss": 0.6463, + "epoch": 0.59, + "grad_norm": 1.8874540845282881, + "learning_rate": 3.744333096312072e-06, + "loss": 0.4951, "step": 8362 }, { - "epoch": 0.88, - "grad_norm": 3.173783144313059, - "learning_rate": 3.727340816672664e-07, - "loss": 0.6332, + "epoch": 0.59, + "grad_norm": 1.9397125909755997, + "learning_rate": 3.7432207904580066e-06, + "loss": 0.5501, "step": 8363 }, { - "epoch": 0.88, - "grad_norm": 2.9002556770934373, - "learning_rate": 3.7208867941007974e-07, - "loss": 0.6431, + "epoch": 0.59, + "grad_norm": 2.028917642071207, + "learning_rate": 3.7421085509912797e-06, + "loss": 0.5372, "step": 8364 }, { - "epoch": 0.88, - "grad_norm": 2.7468354426920008, - "learning_rate": 3.7144381481025114e-07, - "loss": 0.5672, + "epoch": 0.59, + "grad_norm": 1.5973939438244427, + "learning_rate": 3.7409963779706415e-06, + "loss": 0.4823, "step": 8365 }, { - "epoch": 0.88, - "grad_norm": 2.4532097840078, - "learning_rate": 3.707994879427007e-07, - "loss": 0.6065, + "epoch": 0.59, + "grad_norm": 1.6936539483684092, + "learning_rate": 3.7398842714548395e-06, + "loss": 0.5207, "step": 8366 }, { - "epoch": 0.88, - "grad_norm": 2.664239854523862, - "learning_rate": 3.7015569888228464e-07, - "loss": 0.5785, + "epoch": 0.59, + "grad_norm": 1.5193164999190856, + "learning_rate": 3.73877223150262e-06, + "loss": 0.5846, "step": 8367 }, { - "epoch": 0.88, - "grad_norm": 3.1037887455483544, - "learning_rate": 3.6951244770379593e-07, - "loss": 0.5904, + "epoch": 0.59, + "grad_norm": 1.6545796634243624, + "learning_rate": 3.737660258172725e-06, + "loss": 0.5766, "step": 8368 }, { - "epoch": 0.88, - "grad_norm": 2.4768409598228636, - "learning_rate": 3.6886973448196475e-07, - "loss": 0.5523, + "epoch": 0.59, + "grad_norm": 0.7056881652354713, + "learning_rate": 3.7365483515238944e-06, + "loss": 0.4164, "step": 8369 }, { - "epoch": 0.88, - "grad_norm": 2.7756714612946563, - "learning_rate": 3.682275592914608e-07, - "loss": 0.6883, + "epoch": 0.59, + "grad_norm": 1.9885831089063595, + "learning_rate": 3.7354365116148594e-06, + "loss": 0.5702, "step": 8370 }, { - "epoch": 0.88, - "grad_norm": 0.9591826331531919, - "learning_rate": 3.675859222068895e-07, - "loss": 0.5301, + "epoch": 0.59, + "grad_norm": 0.7920317266708707, + "learning_rate": 3.734324738504352e-06, + "loss": 0.4556, "step": 8371 }, { - "epoch": 0.88, - "grad_norm": 0.9503987597108043, - "learning_rate": 3.669448233027967e-07, - "loss": 0.5432, + "epoch": 0.59, + "grad_norm": 1.4477074450110912, + "learning_rate": 3.7332130322511016e-06, + "loss": 0.533, "step": 8372 }, { - "epoch": 0.88, - "grad_norm": 2.2208818742361625, - "learning_rate": 3.6630426265366003e-07, - "loss": 0.5948, + "epoch": 0.59, + "grad_norm": 1.4846544533535329, + "learning_rate": 3.7321013929138327e-06, + "loss": 0.56, "step": 8373 }, { - "epoch": 0.88, - "grad_norm": 0.9397750897472155, - "learning_rate": 3.6566424033389947e-07, - "loss": 0.5438, + "epoch": 0.59, + "grad_norm": 1.6913307592120124, + "learning_rate": 3.7309898205512616e-06, + "loss": 0.5697, "step": 8374 }, { - "epoch": 0.88, - "grad_norm": 2.765113073654002, - "learning_rate": 3.6502475641787107e-07, - "loss": 0.6899, + "epoch": 0.59, + "grad_norm": 1.6753812224936273, + "learning_rate": 3.7298783152221093e-06, + "loss": 0.5388, "step": 8375 }, { - "epoch": 0.88, - "grad_norm": 2.389118275586379, - "learning_rate": 3.6438581097986867e-07, - "loss": 0.7196, + "epoch": 0.59, + "grad_norm": 1.9052895916415808, + "learning_rate": 3.7287668769850886e-06, + "loss": 0.5212, "step": 8376 }, { - "epoch": 0.88, - "grad_norm": 2.1693703595965332, - "learning_rate": 3.637474040941225e-07, - "loss": 0.5825, + "epoch": 0.59, + "grad_norm": 1.8127285585669985, + "learning_rate": 3.7276555058989097e-06, + "loss": 0.5912, "step": 8377 }, { - "epoch": 0.88, - "grad_norm": 3.064136002731287, - "learning_rate": 3.6310953583480024e-07, - "loss": 0.6088, + "epoch": 0.59, + "grad_norm": 1.7066600702084245, + "learning_rate": 3.7265442020222764e-06, + "loss": 0.5529, "step": 8378 }, { - "epoch": 0.88, - "grad_norm": 2.293347710145248, - "learning_rate": 3.6247220627600833e-07, - "loss": 0.6058, + "epoch": 0.59, + "grad_norm": 1.5739581009317187, + "learning_rate": 3.725432965413895e-06, + "loss": 0.4921, "step": 8379 }, { - "epoch": 0.88, - "grad_norm": 2.6363547175045237, - "learning_rate": 3.6183541549179025e-07, - "loss": 0.5608, + "epoch": 0.59, + "grad_norm": 1.705446443523998, + "learning_rate": 3.7243217961324628e-06, + "loss": 0.5053, "step": 8380 }, { - "epoch": 0.88, - "grad_norm": 2.2724907939257672, - "learning_rate": 3.6119916355612627e-07, - "loss": 0.5671, + "epoch": 0.59, + "grad_norm": 1.6674140281736634, + "learning_rate": 3.7232106942366752e-06, + "loss": 0.5113, "step": 8381 }, { - "epoch": 0.88, - "grad_norm": 3.166605602109891, - "learning_rate": 3.6056345054293283e-07, - "loss": 0.6426, + "epoch": 0.59, + "grad_norm": 1.6917089778598349, + "learning_rate": 3.7220996597852254e-06, + "loss": 0.5534, "step": 8382 }, { - "epoch": 0.88, - "grad_norm": 2.242029102578855, - "learning_rate": 3.59928276526067e-07, - "loss": 0.5707, + "epoch": 0.59, + "grad_norm": 1.7539293530247697, + "learning_rate": 3.7209886928368017e-06, + "loss": 0.5664, "step": 8383 }, { - "epoch": 0.88, - "grad_norm": 2.7693849737510656, - "learning_rate": 3.592936415793208e-07, - "loss": 0.6228, + "epoch": 0.59, + "grad_norm": 1.422622081894532, + "learning_rate": 3.71987779345009e-06, + "loss": 0.501, "step": 8384 }, { - "epoch": 0.88, - "grad_norm": 2.104105928630141, - "learning_rate": 3.586595457764247e-07, - "loss": 0.5438, + "epoch": 0.6, + "grad_norm": 2.087627391855198, + "learning_rate": 3.7187669616837705e-06, + "loss": 0.5134, "step": 8385 }, { - "epoch": 0.88, - "grad_norm": 2.368575714906391, - "learning_rate": 3.580259891910465e-07, - "loss": 0.6578, + "epoch": 0.6, + "grad_norm": 1.88797064324849, + "learning_rate": 3.7176561975965202e-06, + "loss": 0.4962, "step": 8386 }, { - "epoch": 0.88, - "grad_norm": 2.8815306378727747, - "learning_rate": 3.573929718967889e-07, - "loss": 0.6015, + "epoch": 0.6, + "grad_norm": 2.1788565232948103, + "learning_rate": 3.7165455012470165e-06, + "loss": 0.5312, "step": 8387 }, { - "epoch": 0.88, - "grad_norm": 2.2975996264972705, - "learning_rate": 3.567604939671959e-07, - "loss": 0.6111, + "epoch": 0.6, + "grad_norm": 2.058239975680285, + "learning_rate": 3.715434872693927e-06, + "loss": 0.5183, "step": 8388 }, { - "epoch": 0.88, - "grad_norm": 3.8232534667191658, - "learning_rate": 3.561285554757471e-07, - "loss": 0.567, + "epoch": 0.6, + "grad_norm": 1.7366744121824125, + "learning_rate": 3.7143243119959214e-06, + "loss": 0.4598, "step": 8389 }, { - "epoch": 0.88, - "grad_norm": 4.616180660257726, - "learning_rate": 3.554971564958587e-07, - "loss": 0.5183, + "epoch": 0.6, + "grad_norm": 2.419152513630357, + "learning_rate": 3.71321381921166e-06, + "loss": 0.499, "step": 8390 }, { - "epoch": 0.88, - "grad_norm": 5.4538629426171275, - "learning_rate": 3.548662971008837e-07, - "loss": 0.6621, + "epoch": 0.6, + "grad_norm": 1.639980460932389, + "learning_rate": 3.7121033943998074e-06, + "loss": 0.4867, "step": 8391 }, { - "epoch": 0.88, - "grad_norm": 2.688169563692595, - "learning_rate": 3.5423597736411463e-07, - "loss": 0.6208, + "epoch": 0.6, + "grad_norm": 1.5698582986150216, + "learning_rate": 3.7109930376190174e-06, + "loss": 0.4946, "step": 8392 }, { - "epoch": 0.88, - "grad_norm": 3.685828822376977, - "learning_rate": 3.536061973587812e-07, - "loss": 0.7017, + "epoch": 0.6, + "grad_norm": 2.119069914497332, + "learning_rate": 3.7098827489279406e-06, + "loss": 0.4961, "step": 8393 }, { - "epoch": 0.88, - "grad_norm": 3.36466383731313, - "learning_rate": 3.5297695715804825e-07, - "loss": 0.6273, + "epoch": 0.6, + "grad_norm": 2.039394087216505, + "learning_rate": 3.7087725283852304e-06, + "loss": 0.5588, "step": 8394 }, { - "epoch": 0.88, - "grad_norm": 7.105461376785732, - "learning_rate": 3.523482568350184e-07, - "loss": 0.5799, + "epoch": 0.6, + "grad_norm": 1.3881358199205418, + "learning_rate": 3.7076623760495307e-06, + "loss": 0.4585, "step": 8395 }, { - "epoch": 0.88, - "grad_norm": 3.2543324276401413, - "learning_rate": 3.517200964627332e-07, - "loss": 0.6232, + "epoch": 0.6, + "grad_norm": 1.9353655851044937, + "learning_rate": 3.7065522919794823e-06, + "loss": 0.5521, "step": 8396 }, { - "epoch": 0.88, - "grad_norm": 6.139641724312683, - "learning_rate": 3.510924761141704e-07, - "loss": 0.6816, + "epoch": 0.6, + "grad_norm": 2.6054465556511817, + "learning_rate": 3.705442276233725e-06, + "loss": 0.5329, "step": 8397 }, { - "epoch": 0.88, - "grad_norm": 3.1532567391885493, - "learning_rate": 3.504653958622456e-07, - "loss": 0.5811, + "epoch": 0.6, + "grad_norm": 1.7647525978053709, + "learning_rate": 3.704332328870892e-06, + "loss": 0.5223, "step": 8398 }, { - "epoch": 0.88, - "grad_norm": 2.7533862384085492, - "learning_rate": 3.49838855779811e-07, - "loss": 0.5851, + "epoch": 0.6, + "grad_norm": 1.9539717477466036, + "learning_rate": 3.7032224499496184e-06, + "loss": 0.5607, "step": 8399 }, { - "epoch": 0.88, - "grad_norm": 2.2806278164148184, - "learning_rate": 3.492128559396552e-07, - "loss": 0.5411, + "epoch": 0.6, + "grad_norm": 0.6742280056343347, + "learning_rate": 3.7021126395285277e-06, + "loss": 0.4327, "step": 8400 }, { - "epoch": 0.88, - "grad_norm": 5.786894398505118, - "learning_rate": 3.485873964145053e-07, - "loss": 0.5874, + "epoch": 0.6, + "grad_norm": 0.7448518225024098, + "learning_rate": 3.701002897666244e-06, + "loss": 0.4272, "step": 8401 }, { - "epoch": 0.88, - "grad_norm": 2.223189773086896, - "learning_rate": 3.479624772770268e-07, - "loss": 0.6098, + "epoch": 0.6, + "grad_norm": 0.7267983605003951, + "learning_rate": 3.6998932244213908e-06, + "loss": 0.4098, "step": 8402 }, { - "epoch": 0.88, - "grad_norm": 2.559630254288751, - "learning_rate": 3.4733809859982037e-07, - "loss": 0.5536, + "epoch": 0.6, + "grad_norm": 2.08111215081883, + "learning_rate": 3.698783619852582e-06, + "loss": 0.5795, "step": 8403 }, { - "epoch": 0.88, - "grad_norm": 3.389108579984836, - "learning_rate": 3.46714260455423e-07, - "loss": 0.517, + "epoch": 0.6, + "grad_norm": 0.716560104457119, + "learning_rate": 3.697674084018432e-06, + "loss": 0.4383, "step": 8404 }, { - "epoch": 0.88, - "grad_norm": 2.5393552430851085, - "learning_rate": 3.460909629163117e-07, - "loss": 0.6811, + "epoch": 0.6, + "grad_norm": 1.891634625591287, + "learning_rate": 3.6965646169775483e-06, + "loss": 0.5401, "step": 8405 }, { - "epoch": 0.88, - "grad_norm": 2.7176090256610452, - "learning_rate": 3.4546820605489974e-07, - "loss": 0.5495, + "epoch": 0.6, + "grad_norm": 1.9008643322901941, + "learning_rate": 3.6954552187885394e-06, + "loss": 0.5309, "step": 8406 }, { - "epoch": 0.88, - "grad_norm": 2.11043021583628, - "learning_rate": 3.448459899435369e-07, - "loss": 0.6564, + "epoch": 0.6, + "grad_norm": 1.6939699912376316, + "learning_rate": 3.694345889510007e-06, + "loss": 0.5307, "step": 8407 }, { - "epoch": 0.88, - "grad_norm": 2.5053404923404297, - "learning_rate": 3.442243146545093e-07, - "loss": 0.5676, + "epoch": 0.6, + "grad_norm": 1.5917040169994547, + "learning_rate": 3.6932366292005463e-06, + "loss": 0.53, "step": 8408 }, { - "epoch": 0.88, - "grad_norm": 2.246167043873439, - "learning_rate": 3.436031802600426e-07, - "loss": 0.6413, + "epoch": 0.6, + "grad_norm": 0.6799297108008673, + "learning_rate": 3.6921274379187572e-06, + "loss": 0.4215, "step": 8409 }, { - "epoch": 0.88, - "grad_norm": 2.9518371356414472, - "learning_rate": 3.4298258683229836e-07, - "loss": 0.6431, + "epoch": 0.6, + "grad_norm": 1.6363206927568872, + "learning_rate": 3.691018315723228e-06, + "loss": 0.5149, "step": 8410 }, { - "epoch": 0.89, - "grad_norm": 2.5332175625722124, - "learning_rate": 3.423625344433756e-07, - "loss": 0.6081, + "epoch": 0.6, + "grad_norm": 2.761710309927765, + "learning_rate": 3.6899092626725462e-06, + "loss": 0.6145, "step": 8411 }, { - "epoch": 0.89, - "grad_norm": 0.9693277831137703, - "learning_rate": 3.417430231653096e-07, - "loss": 0.5281, + "epoch": 0.6, + "grad_norm": 2.3064860362171906, + "learning_rate": 3.6888002788252952e-06, + "loss": 0.5153, "step": 8412 }, { - "epoch": 0.89, - "grad_norm": 2.1510911382680074, - "learning_rate": 3.4112405307007266e-07, - "loss": 0.6343, + "epoch": 0.6, + "grad_norm": 1.6062453140056026, + "learning_rate": 3.687691364240059e-06, + "loss": 0.477, "step": 8413 }, { - "epoch": 0.89, - "grad_norm": 2.465031585748283, - "learning_rate": 3.4050562422957624e-07, - "loss": 0.5676, + "epoch": 0.6, + "grad_norm": 2.3072979958512025, + "learning_rate": 3.6865825189754113e-06, + "loss": 0.6136, "step": 8414 }, { - "epoch": 0.89, - "grad_norm": 2.664072247681827, - "learning_rate": 3.3988773671566777e-07, - "loss": 0.6398, + "epoch": 0.6, + "grad_norm": 1.384993033451164, + "learning_rate": 3.685473743089925e-06, + "loss": 0.5798, "step": 8415 }, { - "epoch": 0.89, - "grad_norm": 3.75888208434279, - "learning_rate": 3.3927039060013045e-07, - "loss": 0.6638, + "epoch": 0.6, + "grad_norm": 1.7314687943944427, + "learning_rate": 3.684365036642168e-06, + "loss": 0.5646, "step": 8416 }, { - "epoch": 0.89, - "grad_norm": 2.388691306338053, - "learning_rate": 3.3865358595468635e-07, - "loss": 0.5692, + "epoch": 0.6, + "grad_norm": 1.6076965723492733, + "learning_rate": 3.68325639969071e-06, + "loss": 0.4692, "step": 8417 }, { - "epoch": 0.89, - "grad_norm": 2.0916358970739397, - "learning_rate": 3.380373228509937e-07, - "loss": 0.5645, + "epoch": 0.6, + "grad_norm": 2.7114175178816224, + "learning_rate": 3.6821478322941095e-06, + "loss": 0.5557, "step": 8418 }, { - "epoch": 0.89, - "grad_norm": 2.745762404593912, - "learning_rate": 3.374216013606485e-07, - "loss": 0.5756, + "epoch": 0.6, + "grad_norm": 1.6995126318004241, + "learning_rate": 3.681039334510927e-06, + "loss": 0.5203, "step": 8419 }, { - "epoch": 0.89, - "grad_norm": 2.1892760583248885, - "learning_rate": 3.368064215551842e-07, - "loss": 0.6245, + "epoch": 0.6, + "grad_norm": 0.7066413086902869, + "learning_rate": 3.6799309063997142e-06, + "loss": 0.4268, "step": 8420 }, { - "epoch": 0.89, - "grad_norm": 2.857345470679988, - "learning_rate": 3.3619178350607016e-07, - "loss": 0.6549, + "epoch": 0.6, + "grad_norm": 0.7493155069207863, + "learning_rate": 3.6788225480190255e-06, + "loss": 0.4721, "step": 8421 }, { - "epoch": 0.89, - "grad_norm": 2.379512536999841, - "learning_rate": 3.355776872847122e-07, - "loss": 0.5277, + "epoch": 0.6, + "grad_norm": 1.5404377534119515, + "learning_rate": 3.6777142594274074e-06, + "loss": 0.5342, "step": 8422 }, { - "epoch": 0.89, - "grad_norm": 2.321362797949176, - "learning_rate": 3.3496413296245536e-07, - "loss": 0.6036, + "epoch": 0.6, + "grad_norm": 3.1484231457794656, + "learning_rate": 3.6766060406833997e-06, + "loss": 0.5329, "step": 8423 }, { - "epoch": 0.89, - "grad_norm": 2.245567667052969, - "learning_rate": 3.343511206105804e-07, - "loss": 0.5352, + "epoch": 0.6, + "grad_norm": 2.2211614934160675, + "learning_rate": 3.6754978918455475e-06, + "loss": 0.5766, "step": 8424 }, { - "epoch": 0.89, - "grad_norm": 2.045437582667061, - "learning_rate": 3.3373865030030536e-07, - "loss": 0.5313, + "epoch": 0.6, + "grad_norm": 1.7827922027899583, + "learning_rate": 3.6743898129723856e-06, + "loss": 0.5292, "step": 8425 }, { - "epoch": 0.89, - "grad_norm": 2.440444633467484, - "learning_rate": 3.331267221027845e-07, - "loss": 0.602, + "epoch": 0.6, + "grad_norm": 1.9551897769164317, + "learning_rate": 3.673281804122444e-06, + "loss": 0.5716, "step": 8426 }, { - "epoch": 0.89, - "grad_norm": 0.9346701755245185, - "learning_rate": 3.325153360891109e-07, - "loss": 0.5728, + "epoch": 0.6, + "grad_norm": 1.640424045645493, + "learning_rate": 3.6721738653542518e-06, + "loss": 0.5679, "step": 8427 }, { - "epoch": 0.89, - "grad_norm": 0.964299003618511, - "learning_rate": 3.319044923303133e-07, - "loss": 0.5423, + "epoch": 0.6, + "grad_norm": 1.5303126687199309, + "learning_rate": 3.671065996726337e-06, + "loss": 0.5136, "step": 8428 }, { - "epoch": 0.89, - "grad_norm": 2.7005210221802867, - "learning_rate": 3.3129419089735825e-07, - "loss": 0.6404, + "epoch": 0.6, + "grad_norm": 1.7623890696198723, + "learning_rate": 3.66995819829722e-06, + "loss": 0.5366, "step": 8429 }, { - "epoch": 0.89, - "grad_norm": 2.6961944982240307, - "learning_rate": 3.306844318611474e-07, - "loss": 0.6103, + "epoch": 0.6, + "grad_norm": 1.4763085237360196, + "learning_rate": 3.668850470125417e-06, + "loss": 0.4984, "step": 8430 }, { - "epoch": 0.89, - "grad_norm": 2.9859468059962073, - "learning_rate": 3.300752152925213e-07, - "loss": 0.616, + "epoch": 0.6, + "grad_norm": 1.7007394984924344, + "learning_rate": 3.6677428122694415e-06, + "loss": 0.5736, "step": 8431 }, { - "epoch": 0.89, - "grad_norm": 2.782849535399388, - "learning_rate": 3.2946654126225776e-07, - "loss": 0.6134, + "epoch": 0.6, + "grad_norm": 1.7325666740469503, + "learning_rate": 3.6666352247878063e-06, + "loss": 0.5147, "step": 8432 }, { - "epoch": 0.89, - "grad_norm": 2.3605109038050074, - "learning_rate": 3.288584098410708e-07, - "loss": 0.578, + "epoch": 0.6, + "grad_norm": 1.7311144202354813, + "learning_rate": 3.665527707739016e-06, + "loss": 0.5637, "step": 8433 }, { - "epoch": 0.89, - "grad_norm": 3.0653052962653273, - "learning_rate": 3.282508210996105e-07, - "loss": 0.5938, + "epoch": 0.6, + "grad_norm": 1.8076587874058818, + "learning_rate": 3.6644202611815748e-06, + "loss": 0.5383, "step": 8434 }, { - "epoch": 0.89, - "grad_norm": 5.320847704691404, - "learning_rate": 3.276437751084649e-07, - "loss": 0.6857, + "epoch": 0.6, + "grad_norm": 1.6214910120211192, + "learning_rate": 3.6633128851739786e-06, + "loss": 0.5175, "step": 8435 }, { - "epoch": 0.89, - "grad_norm": 4.171177977533934, - "learning_rate": 3.270372719381587e-07, - "loss": 0.5834, + "epoch": 0.6, + "grad_norm": 1.6736829618330555, + "learning_rate": 3.662205579774728e-06, + "loss": 0.5186, "step": 8436 }, { - "epoch": 0.89, - "grad_norm": 7.886349099099324, - "learning_rate": 3.264313116591555e-07, - "loss": 0.6321, + "epoch": 0.6, + "grad_norm": 0.6418234745547892, + "learning_rate": 3.661098345042311e-06, + "loss": 0.4493, "step": 8437 }, { - "epoch": 0.89, - "grad_norm": 2.8574070263501765, - "learning_rate": 3.2582589434185184e-07, - "loss": 0.593, + "epoch": 0.6, + "grad_norm": 1.9413842531911274, + "learning_rate": 3.659991181035217e-06, + "loss": 0.4986, "step": 8438 }, { - "epoch": 0.89, - "grad_norm": 0.9494414704404982, - "learning_rate": 3.252210200565842e-07, - "loss": 0.5359, + "epoch": 0.6, + "grad_norm": 1.725408384594458, + "learning_rate": 3.658884087811927e-06, + "loss": 0.5232, "step": 8439 }, { - "epoch": 0.89, - "grad_norm": 3.1811163106053844, - "learning_rate": 3.2461668887362407e-07, - "loss": 0.6684, + "epoch": 0.6, + "grad_norm": 1.9657218231433107, + "learning_rate": 3.6577770654309243e-06, + "loss": 0.5422, "step": 8440 }, { - "epoch": 0.89, - "grad_norm": 2.381250101023003, - "learning_rate": 3.2401290086318315e-07, - "loss": 0.5573, + "epoch": 0.6, + "grad_norm": 1.767906565628731, + "learning_rate": 3.6566701139506865e-06, + "loss": 0.539, "step": 8441 }, { - "epoch": 0.89, - "grad_norm": 2.4346013172983683, - "learning_rate": 3.2340965609540643e-07, - "loss": 0.6883, + "epoch": 0.6, + "grad_norm": 2.0079875035507406, + "learning_rate": 3.655563233429683e-06, + "loss": 0.5316, "step": 8442 }, { - "epoch": 0.89, - "grad_norm": 2.3133127483274003, - "learning_rate": 3.228069546403767e-07, - "loss": 0.5346, + "epoch": 0.6, + "grad_norm": 1.539904462720375, + "learning_rate": 3.6544564239263867e-06, + "loss": 0.5269, "step": 8443 }, { - "epoch": 0.89, - "grad_norm": 4.184411233315387, - "learning_rate": 3.222047965681141e-07, - "loss": 0.5935, + "epoch": 0.6, + "grad_norm": 1.55200659282388, + "learning_rate": 3.6533496854992623e-06, + "loss": 0.5628, "step": 8444 }, { - "epoch": 0.89, - "grad_norm": 3.0278851604452073, - "learning_rate": 3.2160318194857655e-07, - "loss": 0.57, + "epoch": 0.6, + "grad_norm": 1.6830297008552193, + "learning_rate": 3.6522430182067704e-06, + "loss": 0.49, "step": 8445 }, { - "epoch": 0.89, - "grad_norm": 2.526714549043835, - "learning_rate": 3.210021108516581e-07, - "loss": 0.5528, + "epoch": 0.6, + "grad_norm": 1.8372938741710332, + "learning_rate": 3.6511364221073668e-06, + "loss": 0.52, "step": 8446 }, { - "epoch": 0.89, - "grad_norm": 7.168643812080329, - "learning_rate": 3.204015833471885e-07, - "loss": 0.5877, + "epoch": 0.6, + "grad_norm": 1.6127534925097204, + "learning_rate": 3.6500298972595107e-06, + "loss": 0.4968, "step": 8447 }, { - "epoch": 0.89, - "grad_norm": 2.058474171768321, - "learning_rate": 3.1980159950493526e-07, - "loss": 0.6024, + "epoch": 0.6, + "grad_norm": 0.6861653952254354, + "learning_rate": 3.6489234437216474e-06, + "loss": 0.4254, "step": 8448 }, { - "epoch": 0.89, - "grad_norm": 2.4688281862366876, - "learning_rate": 3.1920215939460263e-07, - "loss": 0.5965, + "epoch": 0.6, + "grad_norm": 1.609156694371853, + "learning_rate": 3.6478170615522278e-06, + "loss": 0.5012, "step": 8449 }, { - "epoch": 0.89, - "grad_norm": 5.718194185512959, - "learning_rate": 3.186032630858332e-07, - "loss": 0.5933, + "epoch": 0.6, + "grad_norm": 1.8850152859968103, + "learning_rate": 3.6467107508096906e-06, + "loss": 0.5555, "step": 8450 }, { - "epoch": 0.89, - "grad_norm": 2.4889377896135856, - "learning_rate": 3.180049106482047e-07, - "loss": 0.5516, + "epoch": 0.6, + "grad_norm": 2.551089139330252, + "learning_rate": 3.645604511552479e-06, + "loss": 0.5049, "step": 8451 }, { - "epoch": 0.89, - "grad_norm": 12.775983633424683, - "learning_rate": 3.1740710215122985e-07, - "loss": 0.5611, + "epoch": 0.6, + "grad_norm": 2.479701489216724, + "learning_rate": 3.644498343839027e-06, + "loss": 0.4981, "step": 8452 }, { - "epoch": 0.89, - "grad_norm": 2.2721038696367137, - "learning_rate": 3.1680983766436244e-07, - "loss": 0.6155, + "epoch": 0.6, + "grad_norm": 1.7036964393343232, + "learning_rate": 3.643392247727765e-06, + "loss": 0.5241, "step": 8453 }, { - "epoch": 0.89, - "grad_norm": 3.1139767872960693, - "learning_rate": 3.16213117256991e-07, - "loss": 0.5942, + "epoch": 0.6, + "grad_norm": 1.5354638264525533, + "learning_rate": 3.642286223277119e-06, + "loss": 0.4589, "step": 8454 }, { - "epoch": 0.89, - "grad_norm": 3.630434059537754, - "learning_rate": 3.1561694099843885e-07, - "loss": 0.6442, + "epoch": 0.6, + "grad_norm": 1.6714423346925884, + "learning_rate": 3.641180270545517e-06, + "loss": 0.4794, "step": 8455 }, { - "epoch": 0.89, - "grad_norm": 2.577929607649873, - "learning_rate": 3.1502130895797066e-07, - "loss": 0.6169, + "epoch": 0.6, + "grad_norm": 1.9452121077743023, + "learning_rate": 3.6400743895913775e-06, + "loss": 0.5469, "step": 8456 }, { - "epoch": 0.89, - "grad_norm": 2.8393452527215897, - "learning_rate": 3.144262212047833e-07, - "loss": 0.6314, + "epoch": 0.6, + "grad_norm": 1.7468475271660806, + "learning_rate": 3.6389685804731155e-06, + "loss": 0.5132, "step": 8457 }, { - "epoch": 0.89, - "grad_norm": 3.113366300161247, - "learning_rate": 3.138316778080125e-07, - "loss": 0.6043, + "epoch": 0.6, + "grad_norm": 1.878718162103846, + "learning_rate": 3.637862843249147e-06, + "loss": 0.5026, "step": 8458 }, { - "epoch": 0.89, - "grad_norm": 2.6875673979733223, - "learning_rate": 3.1323767883673193e-07, - "loss": 0.5829, + "epoch": 0.6, + "grad_norm": 1.9539154861311303, + "learning_rate": 3.636757177977878e-06, + "loss": 0.5254, "step": 8459 }, { - "epoch": 0.89, - "grad_norm": 2.46249726044324, - "learning_rate": 3.1264422435994977e-07, - "loss": 0.6048, + "epoch": 0.6, + "grad_norm": 1.5859930834860356, + "learning_rate": 3.635651584717715e-06, + "loss": 0.5189, "step": 8460 }, { - "epoch": 0.89, - "grad_norm": 2.3009561213241656, - "learning_rate": 3.120513144466109e-07, - "loss": 0.6559, + "epoch": 0.6, + "grad_norm": 1.9967485788384574, + "learning_rate": 3.6345460635270557e-06, + "loss": 0.5016, "step": 8461 }, { - "epoch": 0.89, - "grad_norm": 2.97192574369102, - "learning_rate": 3.114589491655989e-07, - "loss": 0.5194, + "epoch": 0.6, + "grad_norm": 0.7220680379633276, + "learning_rate": 3.633440614464302e-06, + "loss": 0.4159, "step": 8462 }, { - "epoch": 0.89, - "grad_norm": 0.9106568447644855, - "learning_rate": 3.1086712858573396e-07, - "loss": 0.546, + "epoch": 0.6, + "grad_norm": 2.164131922923717, + "learning_rate": 3.6323352375878453e-06, + "loss": 0.5565, "step": 8463 }, { - "epoch": 0.89, - "grad_norm": 2.8679076537404593, - "learning_rate": 3.10275852775771e-07, - "loss": 0.6296, + "epoch": 0.6, + "grad_norm": 1.6124963219654278, + "learning_rate": 3.6312299329560764e-06, + "loss": 0.4642, "step": 8464 }, { - "epoch": 0.89, - "grad_norm": 2.538688766180127, - "learning_rate": 3.0968512180440225e-07, - "loss": 0.5939, + "epoch": 0.6, + "grad_norm": 2.091709286107164, + "learning_rate": 3.630124700627378e-06, + "loss": 0.5019, "step": 8465 }, { - "epoch": 0.89, - "grad_norm": 2.185856983077277, - "learning_rate": 3.090949357402573e-07, - "loss": 0.5949, + "epoch": 0.6, + "grad_norm": 1.6241556638302501, + "learning_rate": 3.629019540660137e-06, + "loss": 0.5338, "step": 8466 }, { - "epoch": 0.89, - "grad_norm": 2.1121250006231236, - "learning_rate": 3.0850529465190295e-07, - "loss": 0.6447, + "epoch": 0.6, + "grad_norm": 1.5007975167723544, + "learning_rate": 3.6279144531127293e-06, + "loss": 0.5408, "step": 8467 }, { - "epoch": 0.89, - "grad_norm": 3.842408960010343, - "learning_rate": 3.079161986078427e-07, - "loss": 0.7068, + "epoch": 0.6, + "grad_norm": 1.754604726653088, + "learning_rate": 3.626809438043528e-06, + "loss": 0.5354, "step": 8468 }, { - "epoch": 0.89, - "grad_norm": 2.2491385619430364, - "learning_rate": 3.073276476765147e-07, - "loss": 0.6113, + "epoch": 0.6, + "grad_norm": 1.816691222621599, + "learning_rate": 3.6257044955109055e-06, + "loss": 0.5661, "step": 8469 }, { - "epoch": 0.89, - "grad_norm": 2.786879233199517, - "learning_rate": 3.0673964192629466e-07, - "loss": 0.5902, + "epoch": 0.6, + "grad_norm": 1.706938793068767, + "learning_rate": 3.6245996255732285e-06, + "loss": 0.5337, "step": 8470 }, { - "epoch": 0.89, - "grad_norm": 3.7520700236563482, - "learning_rate": 3.061521814254964e-07, - "loss": 0.6111, + "epoch": 0.6, + "grad_norm": 0.7720404020705618, + "learning_rate": 3.6234948282888615e-06, + "loss": 0.4589, "step": 8471 }, { - "epoch": 0.89, - "grad_norm": 3.037239255860453, - "learning_rate": 3.0556526624237025e-07, - "loss": 0.5664, + "epoch": 0.6, + "grad_norm": 1.685261718786503, + "learning_rate": 3.622390103716159e-06, + "loss": 0.5634, "step": 8472 }, { - "epoch": 0.89, - "grad_norm": 2.6847363520434486, - "learning_rate": 3.049788964451006e-07, - "loss": 0.6382, + "epoch": 0.6, + "grad_norm": 1.7895304556248426, + "learning_rate": 3.6212854519134812e-06, + "loss": 0.531, "step": 8473 }, { - "epoch": 0.89, - "grad_norm": 2.446565844450911, - "learning_rate": 3.043930721018107e-07, - "loss": 0.5745, + "epoch": 0.6, + "grad_norm": 1.8358878784059525, + "learning_rate": 3.6201808729391776e-06, + "loss": 0.514, "step": 8474 }, { - "epoch": 0.89, - "grad_norm": 2.727557086897107, - "learning_rate": 3.0380779328055945e-07, - "loss": 0.5672, + "epoch": 0.6, + "grad_norm": 1.7046440689926985, + "learning_rate": 3.619076366851596e-06, + "loss": 0.499, "step": 8475 }, { - "epoch": 0.89, - "grad_norm": 2.677310040784338, - "learning_rate": 3.0322306004934467e-07, - "loss": 0.6836, + "epoch": 0.6, + "grad_norm": 2.0540247521066135, + "learning_rate": 3.617971933709078e-06, + "loss": 0.5762, "step": 8476 }, { - "epoch": 0.89, - "grad_norm": 12.712228333925863, - "learning_rate": 3.026388724760976e-07, - "loss": 0.5878, + "epoch": 0.6, + "grad_norm": 2.097317807870029, + "learning_rate": 3.6168675735699664e-06, + "loss": 0.543, "step": 8477 }, { - "epoch": 0.89, - "grad_norm": 2.7959771086323078, - "learning_rate": 3.020552306286867e-07, - "loss": 0.5857, + "epoch": 0.6, + "grad_norm": 1.5410546606354638, + "learning_rate": 3.615763286492596e-06, + "loss": 0.476, "step": 8478 }, { - "epoch": 0.89, - "grad_norm": 2.3823323020029257, - "learning_rate": 3.0147213457491887e-07, - "loss": 0.6022, + "epoch": 0.6, + "grad_norm": 0.6841170620291218, + "learning_rate": 3.6146590725353003e-06, + "loss": 0.438, "step": 8479 }, { - "epoch": 0.89, - "grad_norm": 2.488707780332215, - "learning_rate": 3.0088958438253656e-07, - "loss": 0.5982, + "epoch": 0.6, + "grad_norm": 1.9975002288326282, + "learning_rate": 3.613554931756405e-06, + "loss": 0.5533, "step": 8480 }, { - "epoch": 0.89, - "grad_norm": 2.3382438488043396, - "learning_rate": 3.00307580119219e-07, - "loss": 0.543, + "epoch": 0.6, + "grad_norm": 1.7175342312444326, + "learning_rate": 3.6124508642142377e-06, + "loss": 0.4976, "step": 8481 }, { - "epoch": 0.89, - "grad_norm": 1.034614711972979, - "learning_rate": 2.9972612185258155e-07, - "loss": 0.5225, + "epoch": 0.6, + "grad_norm": 2.1019451448738455, + "learning_rate": 3.6113468699671174e-06, + "loss": 0.5386, "step": 8482 }, { - "epoch": 0.89, - "grad_norm": 3.30925472166408, - "learning_rate": 2.9914520965017515e-07, - "loss": 0.6756, + "epoch": 0.6, + "grad_norm": 1.8112712426137274, + "learning_rate": 3.61024294907336e-06, + "loss": 0.5583, "step": 8483 }, { - "epoch": 0.89, - "grad_norm": 1.0845217256514865, - "learning_rate": 2.985648435794897e-07, - "loss": 0.5391, + "epoch": 0.6, + "grad_norm": 1.6233361508356663, + "learning_rate": 3.6091391015912787e-06, + "loss": 0.4899, "step": 8484 }, { - "epoch": 0.89, - "grad_norm": 2.0947266192143617, - "learning_rate": 2.9798502370795123e-07, - "loss": 0.6038, + "epoch": 0.6, + "grad_norm": 1.7835638076941047, + "learning_rate": 3.6080353275791837e-06, + "loss": 0.4973, "step": 8485 }, { - "epoch": 0.89, - "grad_norm": 2.143425501436348, - "learning_rate": 2.974057501029204e-07, - "loss": 0.6399, + "epoch": 0.6, + "grad_norm": 1.8464443473124268, + "learning_rate": 3.606931627095379e-06, + "loss": 0.5419, "step": 8486 }, { - "epoch": 0.89, - "grad_norm": 2.9656804898570805, - "learning_rate": 2.968270228316944e-07, - "loss": 0.5797, + "epoch": 0.6, + "grad_norm": 1.6703401583103825, + "learning_rate": 3.605828000198165e-06, + "loss": 0.5133, "step": 8487 }, { - "epoch": 0.89, - "grad_norm": 3.1462792820258367, - "learning_rate": 2.9624884196151003e-07, - "loss": 0.689, + "epoch": 0.6, + "grad_norm": 1.7300195235556997, + "learning_rate": 3.604724446945842e-06, + "loss": 0.5835, "step": 8488 }, { - "epoch": 0.89, - "grad_norm": 2.3954574670266324, - "learning_rate": 2.956712075595386e-07, - "loss": 0.6849, + "epoch": 0.6, + "grad_norm": 1.7206951598385563, + "learning_rate": 3.6036209673967016e-06, + "loss": 0.478, "step": 8489 }, { - "epoch": 0.89, - "grad_norm": 2.4138440964653873, - "learning_rate": 2.950941196928869e-07, - "loss": 0.6271, + "epoch": 0.6, + "grad_norm": 1.798518877342212, + "learning_rate": 3.6025175616090335e-06, + "loss": 0.4517, "step": 8490 }, { - "epoch": 0.89, - "grad_norm": 2.0756281900786377, - "learning_rate": 2.945175784286003e-07, - "loss": 0.4975, + "epoch": 0.6, + "grad_norm": 1.7483194208158377, + "learning_rate": 3.6014142296411207e-06, + "loss": 0.5648, "step": 8491 }, { - "epoch": 0.89, - "grad_norm": 2.6184403922706543, - "learning_rate": 2.93941583833659e-07, - "loss": 0.6096, + "epoch": 0.6, + "grad_norm": 1.523055153038924, + "learning_rate": 3.6003109715512484e-06, + "loss": 0.461, "step": 8492 }, { - "epoch": 0.89, - "grad_norm": 3.0901832655249137, - "learning_rate": 2.933661359749801e-07, - "loss": 0.6738, + "epoch": 0.6, + "grad_norm": 1.6793477648911694, + "learning_rate": 3.599207787397695e-06, + "loss": 0.5214, "step": 8493 }, { - "epoch": 0.89, - "grad_norm": 3.0034179637882055, - "learning_rate": 2.9279123491941895e-07, - "loss": 0.5927, + "epoch": 0.6, + "grad_norm": 1.6538908914919823, + "learning_rate": 3.5981046772387322e-06, + "loss": 0.5179, "step": 8494 }, { - "epoch": 0.89, - "grad_norm": 2.4572881737070493, - "learning_rate": 2.9221688073376497e-07, - "loss": 0.5141, + "epoch": 0.6, + "grad_norm": 2.519327032008648, + "learning_rate": 3.5970016411326292e-06, + "loss": 0.4738, "step": 8495 }, { - "epoch": 0.89, - "grad_norm": 1.101626346769178, - "learning_rate": 2.916430734847442e-07, - "loss": 0.5403, + "epoch": 0.6, + "grad_norm": 0.7130394859550871, + "learning_rate": 3.595898679137655e-06, + "loss": 0.4313, "step": 8496 }, { - "epoch": 0.89, - "grad_norm": 2.641326366393812, - "learning_rate": 2.910698132390211e-07, - "loss": 0.5564, + "epoch": 0.6, + "grad_norm": 1.5260350057255188, + "learning_rate": 3.59479579131207e-06, + "loss": 0.5527, "step": 8497 }, { - "epoch": 0.89, - "grad_norm": 2.471920861357954, - "learning_rate": 2.904971000631951e-07, - "loss": 0.6577, + "epoch": 0.6, + "grad_norm": 1.6664911053351654, + "learning_rate": 3.593692977714133e-06, + "loss": 0.5272, "step": 8498 }, { - "epoch": 0.89, - "grad_norm": 2.4266070572352922, - "learning_rate": 2.899249340238025e-07, - "loss": 0.5911, + "epoch": 0.6, + "grad_norm": 1.496888040088009, + "learning_rate": 3.5925902384020968e-06, + "loss": 0.4815, "step": 8499 }, { - "epoch": 0.89, - "grad_norm": 2.536560963334578, - "learning_rate": 2.893533151873146e-07, - "loss": 0.5689, + "epoch": 0.6, + "grad_norm": 1.7383545468342498, + "learning_rate": 3.5914875734342136e-06, + "loss": 0.4434, "step": 8500 }, { - "epoch": 0.89, - "grad_norm": 2.6887191325980644, - "learning_rate": 2.887822436201415e-07, - "loss": 0.634, + "epoch": 0.6, + "grad_norm": 5.489685136666059, + "learning_rate": 3.5903849828687305e-06, + "loss": 0.4999, "step": 8501 }, { - "epoch": 0.89, - "grad_norm": 3.3999765388204106, - "learning_rate": 2.882117193886297e-07, - "loss": 0.5534, + "epoch": 0.6, + "grad_norm": 1.764116540152303, + "learning_rate": 3.5892824667638893e-06, + "loss": 0.5001, "step": 8502 }, { - "epoch": 0.89, - "grad_norm": 2.2917807653579576, - "learning_rate": 2.8764174255905886e-07, - "loss": 0.6365, + "epoch": 0.6, + "grad_norm": 1.8918923410379185, + "learning_rate": 3.5881800251779257e-06, + "loss": 0.5346, "step": 8503 }, { - "epoch": 0.89, - "grad_norm": 2.318274584316322, - "learning_rate": 2.870723131976494e-07, - "loss": 0.5574, + "epoch": 0.6, + "grad_norm": 1.957370527399626, + "learning_rate": 3.5870776581690796e-06, + "loss": 0.5316, "step": 8504 }, { - "epoch": 0.89, - "grad_norm": 4.890909130662085, - "learning_rate": 2.865034313705539e-07, - "loss": 0.5967, + "epoch": 0.6, + "grad_norm": 2.5442222836100274, + "learning_rate": 3.5859753657955795e-06, + "loss": 0.4745, "step": 8505 }, { - "epoch": 0.9, - "grad_norm": 2.0997713123306756, - "learning_rate": 2.8593509714386456e-07, - "loss": 0.5588, + "epoch": 0.6, + "grad_norm": 1.6413320543355392, + "learning_rate": 3.5848731481156494e-06, + "loss": 0.5524, "step": 8506 }, { - "epoch": 0.9, - "grad_norm": 2.617809589685534, - "learning_rate": 2.853673105836091e-07, - "loss": 0.5374, + "epoch": 0.6, + "grad_norm": 1.807254487198008, + "learning_rate": 3.583771005187516e-06, + "loss": 0.5704, "step": 8507 }, { - "epoch": 0.9, - "grad_norm": 2.68605129954623, - "learning_rate": 2.8480007175575144e-07, + "epoch": 0.6, + "grad_norm": 2.2078292157651527, + "learning_rate": 3.5826689370693977e-06, "loss": 0.5403, "step": 8508 }, { - "epoch": 0.9, - "grad_norm": 2.2348488550547074, - "learning_rate": 2.842333807261899e-07, - "loss": 0.575, + "epoch": 0.6, + "grad_norm": 1.9673840083137735, + "learning_rate": 3.581566943819509e-06, + "loss": 0.5564, "step": 8509 }, { - "epoch": 0.9, - "grad_norm": 2.701909082687133, - "learning_rate": 2.836672375607624e-07, - "loss": 0.5603, + "epoch": 0.6, + "grad_norm": 1.9633379145802055, + "learning_rate": 3.5804650254960584e-06, + "loss": 0.5163, "step": 8510 }, { - "epoch": 0.9, - "grad_norm": 2.982475752321344, - "learning_rate": 2.831016423252425e-07, - "loss": 0.5673, + "epoch": 0.6, + "grad_norm": 1.8161809746126951, + "learning_rate": 3.5793631821572572e-06, + "loss": 0.4922, "step": 8511 }, { - "epoch": 0.9, - "grad_norm": 2.9635176976288626, - "learning_rate": 2.825365950853387e-07, - "loss": 0.6621, + "epoch": 0.6, + "grad_norm": 1.7542027044361765, + "learning_rate": 3.5782614138613065e-06, + "loss": 0.5456, "step": 8512 }, { - "epoch": 0.9, - "grad_norm": 12.14753234673202, - "learning_rate": 2.8197209590669573e-07, - "loss": 0.5816, + "epoch": 0.6, + "grad_norm": 1.7504002409644621, + "learning_rate": 3.5771597206664043e-06, + "loss": 0.5623, "step": 8513 }, { - "epoch": 0.9, - "grad_norm": 2.4088842765153546, - "learning_rate": 2.814081448548961e-07, - "loss": 0.5515, + "epoch": 0.6, + "grad_norm": 1.7733740718895443, + "learning_rate": 3.576058102630747e-06, + "loss": 0.5053, "step": 8514 }, { - "epoch": 0.9, - "grad_norm": 2.5924726034885115, - "learning_rate": 2.8084474199545907e-07, - "loss": 0.6995, + "epoch": 0.6, + "grad_norm": 1.6836940624096886, + "learning_rate": 3.574956559812526e-06, + "loss": 0.497, "step": 8515 }, { - "epoch": 0.9, - "grad_norm": 2.319648207905272, - "learning_rate": 2.802818873938373e-07, - "loss": 0.5947, + "epoch": 0.6, + "grad_norm": 1.672679931234567, + "learning_rate": 3.573855092269929e-06, + "loss": 0.5656, "step": 8516 }, { - "epoch": 0.9, - "grad_norm": 2.9901729743477024, - "learning_rate": 2.79719581115423e-07, - "loss": 0.6025, + "epoch": 0.6, + "grad_norm": 1.5097036679768139, + "learning_rate": 3.572753700061139e-06, + "loss": 0.4998, "step": 8517 }, { - "epoch": 0.9, - "grad_norm": 2.1532085821221703, - "learning_rate": 2.7915782322554265e-07, - "loss": 0.574, + "epoch": 0.6, + "grad_norm": 1.835212990995145, + "learning_rate": 3.5716523832443315e-06, + "loss": 0.5445, "step": 8518 }, { - "epoch": 0.9, - "grad_norm": 2.386635632359815, - "learning_rate": 2.7859661378945966e-07, - "loss": 0.5769, + "epoch": 0.6, + "grad_norm": 2.2632335643866788, + "learning_rate": 3.570551141877688e-06, + "loss": 0.5128, "step": 8519 }, { - "epoch": 0.9, - "grad_norm": 2.495915367797323, - "learning_rate": 2.7803595287237416e-07, - "loss": 0.6033, + "epoch": 0.6, + "grad_norm": 2.077325256037344, + "learning_rate": 3.5694499760193756e-06, + "loss": 0.5678, "step": 8520 }, { - "epoch": 0.9, - "grad_norm": 2.2480787603907597, - "learning_rate": 2.7747584053942236e-07, - "loss": 0.5373, + "epoch": 0.6, + "grad_norm": 1.9563976654204593, + "learning_rate": 3.5683488857275627e-06, + "loss": 0.6069, "step": 8521 }, { - "epoch": 0.9, - "grad_norm": 2.755749524388031, - "learning_rate": 2.7691627685567545e-07, - "loss": 0.6319, + "epoch": 0.6, + "grad_norm": 1.656952269379425, + "learning_rate": 3.567247871060413e-06, + "loss": 0.4867, "step": 8522 }, { - "epoch": 0.9, - "grad_norm": 3.8563418049060942, - "learning_rate": 2.763572618861421e-07, - "loss": 0.6336, + "epoch": 0.6, + "grad_norm": 1.5234003657520876, + "learning_rate": 3.5661469320760865e-06, + "loss": 0.5149, "step": 8523 }, { - "epoch": 0.9, - "grad_norm": 0.9803488628787727, - "learning_rate": 2.7579879569576805e-07, - "loss": 0.5288, + "epoch": 0.6, + "grad_norm": 1.6767181931926602, + "learning_rate": 3.565046068832737e-06, + "loss": 0.4567, "step": 8524 }, { - "epoch": 0.9, - "grad_norm": 2.158913363300771, - "learning_rate": 2.7524087834943257e-07, - "loss": 0.5508, + "epoch": 0.6, + "grad_norm": 1.7659694529669896, + "learning_rate": 3.5639452813885155e-06, + "loss": 0.5, "step": 8525 }, { - "epoch": 0.9, - "grad_norm": 2.2990257664705704, - "learning_rate": 2.746835099119555e-07, - "loss": 0.5405, + "epoch": 0.61, + "grad_norm": 2.448629918223684, + "learning_rate": 3.562844569801571e-06, + "loss": 0.5178, "step": 8526 }, { - "epoch": 0.9, - "grad_norm": 2.722742879522637, - "learning_rate": 2.7412669044808714e-07, - "loss": 0.59, + "epoch": 0.61, + "grad_norm": 1.9984636895239503, + "learning_rate": 3.5617439341300476e-06, + "loss": 0.5152, "step": 8527 }, { - "epoch": 0.9, - "grad_norm": 2.7098085005125347, - "learning_rate": 2.7357042002251977e-07, - "loss": 0.6599, + "epoch": 0.61, + "grad_norm": 1.9593409164796303, + "learning_rate": 3.560643374432081e-06, + "loss": 0.5846, "step": 8528 }, { - "epoch": 0.9, - "grad_norm": 2.414604589485784, - "learning_rate": 2.730146986998783e-07, - "loss": 0.5914, + "epoch": 0.61, + "grad_norm": 1.5876671974302838, + "learning_rate": 3.559542890765809e-06, + "loss": 0.5277, "step": 8529 }, { - "epoch": 0.9, - "grad_norm": 2.9999875277016037, - "learning_rate": 2.7245952654472495e-07, - "loss": 0.6089, + "epoch": 0.61, + "grad_norm": 1.8568271110683356, + "learning_rate": 3.558442483189362e-06, + "loss": 0.5602, "step": 8530 }, { - "epoch": 0.9, - "grad_norm": 2.4698756738709617, - "learning_rate": 2.7190490362155706e-07, - "loss": 0.7083, + "epoch": 0.61, + "grad_norm": 1.5624427529907663, + "learning_rate": 3.5573421517608693e-06, + "loss": 0.5021, "step": 8531 }, { - "epoch": 0.9, - "grad_norm": 4.0071257148376604, - "learning_rate": 2.7135082999481033e-07, - "loss": 0.6586, + "epoch": 0.61, + "grad_norm": 0.7292797069158602, + "learning_rate": 3.556241896538452e-06, + "loss": 0.4195, "step": 8532 }, { - "epoch": 0.9, - "grad_norm": 2.4350296312759276, - "learning_rate": 2.707973057288554e-07, - "loss": 0.5523, + "epoch": 0.61, + "grad_norm": 1.5714879555592605, + "learning_rate": 3.5551417175802282e-06, + "loss": 0.5259, "step": 8533 }, { - "epoch": 0.9, - "grad_norm": 0.9785381511638156, - "learning_rate": 2.7024433088799874e-07, - "loss": 0.5391, + "epoch": 0.61, + "grad_norm": 1.6502885950638757, + "learning_rate": 3.554041614944316e-06, + "loss": 0.5091, "step": 8534 }, { - "epoch": 0.9, - "grad_norm": 3.7213298482636703, - "learning_rate": 2.696919055364827e-07, - "loss": 0.6217, + "epoch": 0.61, + "grad_norm": 1.7478440126676482, + "learning_rate": 3.5529415886888254e-06, + "loss": 0.5289, "step": 8535 }, { - "epoch": 0.9, - "grad_norm": 2.7426731105858804, - "learning_rate": 2.691400297384872e-07, - "loss": 0.622, + "epoch": 0.61, + "grad_norm": 1.919660465395001, + "learning_rate": 3.5518416388718625e-06, + "loss": 0.532, "step": 8536 }, { - "epoch": 0.9, - "grad_norm": 2.9682834230681583, - "learning_rate": 2.6858870355812807e-07, - "loss": 0.6019, + "epoch": 0.61, + "grad_norm": 1.9914490422011197, + "learning_rate": 3.550741765551532e-06, + "loss": 0.4698, "step": 8537 }, { - "epoch": 0.9, - "grad_norm": 2.5959681300857804, - "learning_rate": 2.6803792705945574e-07, - "loss": 0.5257, + "epoch": 0.61, + "grad_norm": 1.5637901791602202, + "learning_rate": 3.549641968785933e-06, + "loss": 0.5351, "step": 8538 }, { - "epoch": 0.9, - "grad_norm": 3.117284087962713, - "learning_rate": 2.674877003064591e-07, - "loss": 0.6142, + "epoch": 0.61, + "grad_norm": 0.7056925718841278, + "learning_rate": 3.5485422486331588e-06, + "loss": 0.4532, "step": 8539 }, { - "epoch": 0.9, - "grad_norm": 2.1014683352430614, - "learning_rate": 2.669380233630603e-07, - "loss": 0.6515, + "epoch": 0.61, + "grad_norm": 1.8681022230562094, + "learning_rate": 3.5474426051513e-06, + "loss": 0.5751, "step": 8540 }, { - "epoch": 0.9, - "grad_norm": 2.665551926914057, - "learning_rate": 2.6638889629311994e-07, - "loss": 0.5391, + "epoch": 0.61, + "grad_norm": 1.6096352872820463, + "learning_rate": 3.5463430383984467e-06, + "loss": 0.5749, "step": 8541 }, { - "epoch": 0.9, - "grad_norm": 3.222214668746664, - "learning_rate": 2.6584031916043476e-07, - "loss": 0.6326, + "epoch": 0.61, + "grad_norm": 1.7693014157040068, + "learning_rate": 3.5452435484326796e-06, + "loss": 0.5057, "step": 8542 }, { - "epoch": 0.9, - "grad_norm": 2.6557840721881734, - "learning_rate": 2.652922920287365e-07, - "loss": 0.557, + "epoch": 0.61, + "grad_norm": 1.763612851440349, + "learning_rate": 3.544144135312077e-06, + "loss": 0.4819, "step": 8543 }, { - "epoch": 0.9, - "grad_norm": 2.4193798372931403, - "learning_rate": 2.647448149616921e-07, - "loss": 0.5985, + "epoch": 0.61, + "grad_norm": 2.0328594430343587, + "learning_rate": 3.5430447990947133e-06, + "loss": 0.5218, "step": 8544 }, { - "epoch": 0.9, - "grad_norm": 4.689574105729332, - "learning_rate": 2.641978880229074e-07, - "loss": 0.608, + "epoch": 0.61, + "grad_norm": 1.792840819943376, + "learning_rate": 3.5419455398386613e-06, + "loss": 0.6028, "step": 8545 }, { - "epoch": 0.9, - "grad_norm": 2.743675378873042, - "learning_rate": 2.636515112759225e-07, - "loss": 0.5973, + "epoch": 0.61, + "grad_norm": 1.6136447860490117, + "learning_rate": 3.540846357601988e-06, + "loss": 0.5395, "step": 8546 }, { - "epoch": 0.9, - "grad_norm": 2.5656395322683725, - "learning_rate": 2.631056847842134e-07, - "loss": 0.5468, + "epoch": 0.61, + "grad_norm": 1.6895752755192506, + "learning_rate": 3.5397472524427535e-06, + "loss": 0.5606, "step": 8547 }, { - "epoch": 0.9, - "grad_norm": 3.1810462861004862, - "learning_rate": 2.625604086111927e-07, - "loss": 0.5399, + "epoch": 0.61, + "grad_norm": 0.7907280370390577, + "learning_rate": 3.5386482244190144e-06, + "loss": 0.4288, "step": 8548 }, { - "epoch": 0.9, - "grad_norm": 2.5217278501998113, - "learning_rate": 2.620156828202092e-07, - "loss": 0.6972, + "epoch": 0.61, + "grad_norm": 1.669234409083218, + "learning_rate": 3.5375492735888305e-06, + "loss": 0.5348, "step": 8549 }, { - "epoch": 0.9, - "grad_norm": 2.4534326162271425, - "learning_rate": 2.6147150747454776e-07, - "loss": 0.6776, + "epoch": 0.61, + "grad_norm": 2.1692310480009707, + "learning_rate": 3.5364504000102483e-06, + "loss": 0.4577, "step": 8550 }, { - "epoch": 0.9, - "grad_norm": 2.7071917479562653, - "learning_rate": 2.609278826374284e-07, - "loss": 0.6411, + "epoch": 0.61, + "grad_norm": 1.8768609492461321, + "learning_rate": 3.535351603741316e-06, + "loss": 0.4937, "step": 8551 }, { - "epoch": 0.9, - "grad_norm": 2.581422750056675, - "learning_rate": 2.6038480837200896e-07, - "loss": 0.6482, + "epoch": 0.61, + "grad_norm": 1.8979769644130036, + "learning_rate": 3.5342528848400737e-06, + "loss": 0.5822, "step": 8552 }, { - "epoch": 0.9, - "grad_norm": 2.335476941637362, - "learning_rate": 2.5984228474138115e-07, - "loss": 0.535, + "epoch": 0.61, + "grad_norm": 1.632010467502985, + "learning_rate": 3.5331542433645626e-06, + "loss": 0.4744, "step": 8553 }, { - "epoch": 0.9, - "grad_norm": 2.4914563871633013, - "learning_rate": 2.593003118085746e-07, - "loss": 0.539, + "epoch": 0.61, + "grad_norm": 2.090037254427941, + "learning_rate": 3.5320556793728146e-06, + "loss": 0.5301, "step": 8554 }, { - "epoch": 0.9, - "grad_norm": 2.477999239774418, - "learning_rate": 2.5875888963655396e-07, - "loss": 0.5921, + "epoch": 0.61, + "grad_norm": 1.5425418024801691, + "learning_rate": 3.530957192922857e-06, + "loss": 0.5233, "step": 8555 }, { - "epoch": 0.9, - "grad_norm": 3.0840194634488522, - "learning_rate": 2.582180182882205e-07, - "loss": 0.579, + "epoch": 0.61, + "grad_norm": 1.7091554760972223, + "learning_rate": 3.5298587840727206e-06, + "loss": 0.5807, "step": 8556 }, { - "epoch": 0.9, - "grad_norm": 2.206238807030622, - "learning_rate": 2.576776978264095e-07, - "loss": 0.6189, + "epoch": 0.61, + "grad_norm": 2.375482573237626, + "learning_rate": 3.5287604528804248e-06, + "loss": 0.5263, "step": 8557 }, { - "epoch": 0.9, - "grad_norm": 2.937734436218125, - "learning_rate": 2.5713792831389473e-07, - "loss": 0.6302, + "epoch": 0.61, + "grad_norm": 1.6296371668817176, + "learning_rate": 3.5276621994039863e-06, + "loss": 0.5681, "step": 8558 }, { - "epoch": 0.9, - "grad_norm": 2.714921870739376, - "learning_rate": 2.565987098133865e-07, - "loss": 0.597, + "epoch": 0.61, + "grad_norm": 1.740079077811567, + "learning_rate": 3.5265640237014177e-06, + "loss": 0.4979, "step": 8559 }, { - "epoch": 0.9, - "grad_norm": 3.5143184938458027, - "learning_rate": 2.56060042387527e-07, - "loss": 0.6705, + "epoch": 0.61, + "grad_norm": 1.7523085195136194, + "learning_rate": 3.525465925830732e-06, + "loss": 0.4819, "step": 8560 }, { - "epoch": 0.9, - "grad_norm": 2.617749300044612, - "learning_rate": 2.5552192609890004e-07, - "loss": 0.6626, + "epoch": 0.61, + "grad_norm": 1.8531724375930958, + "learning_rate": 3.5243679058499336e-06, + "loss": 0.5834, "step": 8561 }, { - "epoch": 0.9, - "grad_norm": 2.422359435875593, - "learning_rate": 2.5498436101001946e-07, - "loss": 0.5862, + "epoch": 0.61, + "grad_norm": 1.661884359110663, + "learning_rate": 3.523269963817022e-06, + "loss": 0.571, "step": 8562 }, { - "epoch": 0.9, - "grad_norm": 2.87556986453871, - "learning_rate": 2.544473471833403e-07, - "loss": 0.5421, + "epoch": 0.61, + "grad_norm": 1.6013581741907958, + "learning_rate": 3.522172099789993e-06, + "loss": 0.472, "step": 8563 }, { - "epoch": 0.9, - "grad_norm": 2.5521477189588757, - "learning_rate": 2.5391088468124934e-07, - "loss": 0.6127, + "epoch": 0.61, + "grad_norm": 1.736612761809169, + "learning_rate": 3.5210743138268426e-06, + "loss": 0.4776, "step": 8564 }, { - "epoch": 0.9, - "grad_norm": 2.626589882214039, - "learning_rate": 2.533749735660729e-07, - "loss": 0.6057, + "epoch": 0.61, + "grad_norm": 1.9600178962443644, + "learning_rate": 3.519976605985558e-06, + "loss": 0.5294, "step": 8565 }, { - "epoch": 0.9, - "grad_norm": 2.272089311491973, - "learning_rate": 2.528396139000705e-07, - "loss": 0.5462, + "epoch": 0.61, + "grad_norm": 1.8678277675344102, + "learning_rate": 3.518878976324124e-06, + "loss": 0.5202, "step": 8566 }, { - "epoch": 0.9, - "grad_norm": 2.497924231522892, - "learning_rate": 2.5230480574543914e-07, - "loss": 0.6173, + "epoch": 0.61, + "grad_norm": 2.0254992822402764, + "learning_rate": 3.5177814249005205e-06, + "loss": 0.5309, "step": 8567 }, { - "epoch": 0.9, - "grad_norm": 2.5946958140611014, - "learning_rate": 2.5177054916431186e-07, - "loss": 0.5676, + "epoch": 0.61, + "grad_norm": 2.1136746347464395, + "learning_rate": 3.5166839517727258e-06, + "loss": 0.4887, "step": 8568 }, { - "epoch": 0.9, - "grad_norm": 4.622437188040537, - "learning_rate": 2.5123684421875627e-07, - "loss": 0.611, + "epoch": 0.61, + "grad_norm": 1.6892594463048125, + "learning_rate": 3.5155865569987113e-06, + "loss": 0.488, "step": 8569 }, { - "epoch": 0.9, - "grad_norm": 2.4619075515236317, - "learning_rate": 2.507036909707766e-07, - "loss": 0.597, + "epoch": 0.61, + "grad_norm": 1.5930608923913296, + "learning_rate": 3.5144892406364423e-06, + "loss": 0.5857, "step": 8570 }, { - "epoch": 0.9, - "grad_norm": 2.130051490960946, - "learning_rate": 2.5017108948231284e-07, - "loss": 0.571, + "epoch": 0.61, + "grad_norm": 1.4945056491956892, + "learning_rate": 3.5133920027438873e-06, + "loss": 0.4749, "step": 8571 }, { - "epoch": 0.9, - "grad_norm": 2.3145413732517084, - "learning_rate": 2.4963903981524265e-07, - "loss": 0.6216, + "epoch": 0.61, + "grad_norm": 1.941993021952384, + "learning_rate": 3.5122948433790035e-06, + "loss": 0.5301, "step": 8572 }, { - "epoch": 0.9, - "grad_norm": 2.6281633995236575, - "learning_rate": 2.4910754203137597e-07, - "loss": 0.5933, + "epoch": 0.61, + "grad_norm": 2.98198260853281, + "learning_rate": 3.5111977625997473e-06, + "loss": 0.5872, "step": 8573 }, { - "epoch": 0.9, - "grad_norm": 0.9962463423913068, - "learning_rate": 2.4857659619246246e-07, - "loss": 0.4788, + "epoch": 0.61, + "grad_norm": 2.218499233712483, + "learning_rate": 3.5101007604640685e-06, + "loss": 0.5514, "step": 8574 }, { - "epoch": 0.9, - "grad_norm": 3.509203947561383, - "learning_rate": 2.4804620236018376e-07, - "loss": 0.6344, + "epoch": 0.61, + "grad_norm": 6.061663801386839, + "learning_rate": 3.5090038370299185e-06, + "loss": 0.5662, "step": 8575 }, { - "epoch": 0.9, - "grad_norm": 2.3901552696802058, - "learning_rate": 2.475163605961617e-07, - "loss": 0.6128, + "epoch": 0.61, + "grad_norm": 1.5927614261550973, + "learning_rate": 3.5079069923552374e-06, + "loss": 0.5155, "step": 8576 }, { - "epoch": 0.9, - "grad_norm": 2.851401165089648, - "learning_rate": 2.4698707096195094e-07, - "loss": 0.5852, + "epoch": 0.61, + "grad_norm": 1.796613495643159, + "learning_rate": 3.506810226497965e-06, + "loss": 0.582, "step": 8577 }, { - "epoch": 0.9, - "grad_norm": 3.7359676704634306, - "learning_rate": 2.4645833351904235e-07, - "loss": 0.5583, + "epoch": 0.61, + "grad_norm": 1.699352637295036, + "learning_rate": 3.5057135395160335e-06, + "loss": 0.4833, "step": 8578 }, { - "epoch": 0.9, - "grad_norm": 2.636845767665651, - "learning_rate": 2.4593014832886344e-07, - "loss": 0.6295, + "epoch": 0.61, + "grad_norm": 1.465252888199619, + "learning_rate": 3.5046169314673788e-06, + "loss": 0.5163, "step": 8579 }, { - "epoch": 0.9, - "grad_norm": 4.772427246864486, - "learning_rate": 2.4540251545277726e-07, - "loss": 0.6169, + "epoch": 0.61, + "grad_norm": 1.8199480413031062, + "learning_rate": 3.503520402409924e-06, + "loss": 0.5283, "step": 8580 }, { - "epoch": 0.9, - "grad_norm": 2.3697955879151564, - "learning_rate": 2.448754349520832e-07, - "loss": 0.5962, + "epoch": 0.61, + "grad_norm": 1.9394787376893605, + "learning_rate": 3.502423952401593e-06, + "loss": 0.5201, "step": 8581 }, { - "epoch": 0.9, - "grad_norm": 2.7219924370672866, - "learning_rate": 2.4434890688801504e-07, - "loss": 0.6225, + "epoch": 0.61, + "grad_norm": 3.2842731981350988, + "learning_rate": 3.501327581500301e-06, + "loss": 0.5299, "step": 8582 }, { - "epoch": 0.9, - "grad_norm": 2.566350444650246, - "learning_rate": 2.4382293132174384e-07, - "loss": 0.5551, + "epoch": 0.61, + "grad_norm": 1.8416611124778581, + "learning_rate": 3.500231289763967e-06, + "loss": 0.5023, "step": 8583 }, { - "epoch": 0.9, - "grad_norm": 2.5372826204659056, - "learning_rate": 2.4329750831437514e-07, - "loss": 0.6557, + "epoch": 0.61, + "grad_norm": 1.709810411818584, + "learning_rate": 3.499135077250498e-06, + "loss": 0.5614, "step": 8584 }, { - "epoch": 0.9, - "grad_norm": 0.8353964126790068, - "learning_rate": 2.427726379269524e-07, - "loss": 0.5587, + "epoch": 0.61, + "grad_norm": 1.7739171010628096, + "learning_rate": 3.4980389440177975e-06, + "loss": 0.5679, "step": 8585 }, { - "epoch": 0.9, - "grad_norm": 2.1888874583305187, - "learning_rate": 2.422483202204523e-07, - "loss": 0.692, + "epoch": 0.61, + "grad_norm": 1.6873529955194646, + "learning_rate": 3.4969428901237717e-06, + "loss": 0.5345, "step": 8586 }, { - "epoch": 0.9, - "grad_norm": 2.4217812609046585, - "learning_rate": 2.417245552557901e-07, - "loss": 0.5887, + "epoch": 0.61, + "grad_norm": 1.6090567598692989, + "learning_rate": 3.495846915626314e-06, + "loss": 0.5732, "step": 8587 }, { - "epoch": 0.9, - "grad_norm": 3.31796627980692, - "learning_rate": 2.4120134309381315e-07, - "loss": 0.7178, + "epoch": 0.61, + "grad_norm": 1.7043479201062248, + "learning_rate": 3.494751020583321e-06, + "loss": 0.489, "step": 8588 }, { - "epoch": 0.9, - "grad_norm": 2.213415629562026, - "learning_rate": 2.406786837953079e-07, - "loss": 0.5923, + "epoch": 0.61, + "grad_norm": 1.683191299785852, + "learning_rate": 3.4936552050526763e-06, + "loss": 0.5725, "step": 8589 }, { - "epoch": 0.9, - "grad_norm": 0.941272484421341, - "learning_rate": 2.401565774209963e-07, - "loss": 0.5444, + "epoch": 0.61, + "grad_norm": 2.0456061144005218, + "learning_rate": 3.4925594690922703e-06, + "loss": 0.4688, "step": 8590 }, { - "epoch": 0.9, - "grad_norm": 3.0359025963461446, - "learning_rate": 2.396350240315337e-07, - "loss": 0.5498, + "epoch": 0.61, + "grad_norm": 1.7431043392552188, + "learning_rate": 3.4914638127599816e-06, + "loss": 0.4768, "step": 8591 }, { - "epoch": 0.9, - "grad_norm": 2.6306647881917153, - "learning_rate": 2.391140236875128e-07, - "loss": 0.6251, + "epoch": 0.61, + "grad_norm": 2.218100764236499, + "learning_rate": 3.490368236113686e-06, + "loss": 0.5787, "step": 8592 }, { - "epoch": 0.9, - "grad_norm": 2.6487766622684172, - "learning_rate": 2.3859357644946233e-07, - "loss": 0.6471, + "epoch": 0.61, + "grad_norm": 1.9616652585769936, + "learning_rate": 3.4892727392112522e-06, + "loss": 0.588, "step": 8593 }, { - "epoch": 0.9, - "grad_norm": 2.645864611230044, - "learning_rate": 2.3807368237784735e-07, - "loss": 0.5693, + "epoch": 0.61, + "grad_norm": 2.5495594797245347, + "learning_rate": 3.4881773221105543e-06, + "loss": 0.5726, "step": 8594 }, { - "epoch": 0.9, - "grad_norm": 2.212215219692308, - "learning_rate": 2.3755434153306555e-07, - "loss": 0.5178, + "epoch": 0.61, + "grad_norm": 2.0960319080208016, + "learning_rate": 3.487081984869452e-06, + "loss": 0.5695, "step": 8595 }, { - "epoch": 0.9, - "grad_norm": 2.5525173270216794, - "learning_rate": 2.370355539754543e-07, - "loss": 0.5043, + "epoch": 0.61, + "grad_norm": 1.5890544908066384, + "learning_rate": 3.485986727545807e-06, + "loss": 0.6049, "step": 8596 }, { - "epoch": 0.9, - "grad_norm": 2.973557160116561, - "learning_rate": 2.3651731976528314e-07, - "loss": 0.5824, + "epoch": 0.61, + "grad_norm": 1.7871155643250827, + "learning_rate": 3.4848915501974704e-06, + "loss": 0.4913, "step": 8597 }, { - "epoch": 0.9, - "grad_norm": 2.368358688574046, - "learning_rate": 2.3599963896276113e-07, - "loss": 0.6403, + "epoch": 0.61, + "grad_norm": 1.8860304806161405, + "learning_rate": 3.4837964528822986e-06, + "loss": 0.533, "step": 8598 }, { - "epoch": 0.9, - "grad_norm": 3.541437487586155, - "learning_rate": 2.354825116280285e-07, - "loss": 0.6646, + "epoch": 0.61, + "grad_norm": 1.995525144606837, + "learning_rate": 3.4827014356581356e-06, + "loss": 0.5443, "step": 8599 }, { - "epoch": 0.9, - "grad_norm": 2.6360131144611247, - "learning_rate": 2.3496593782116607e-07, - "loss": 0.5936, + "epoch": 0.61, + "grad_norm": 1.7909766442646002, + "learning_rate": 3.4816064985828225e-06, + "loss": 0.4908, "step": 8600 }, { - "epoch": 0.91, - "grad_norm": 2.4729013605037418, - "learning_rate": 2.3444991760218526e-07, - "loss": 0.6654, + "epoch": 0.61, + "grad_norm": 0.6517337367138254, + "learning_rate": 3.4805116417142006e-06, + "loss": 0.4267, "step": 8601 }, { - "epoch": 0.91, - "grad_norm": 2.4321075434936485, - "learning_rate": 2.3393445103103762e-07, - "loss": 0.6181, + "epoch": 0.61, + "grad_norm": 2.398158352388218, + "learning_rate": 3.4794168651101013e-06, + "loss": 0.545, "step": 8602 }, { - "epoch": 0.91, - "grad_norm": 2.557292183237137, - "learning_rate": 2.334195381676091e-07, - "loss": 0.6118, + "epoch": 0.61, + "grad_norm": 1.7285264595841063, + "learning_rate": 3.4783221688283573e-06, + "loss": 0.575, "step": 8603 }, { - "epoch": 0.91, - "grad_norm": 2.7334084120232323, - "learning_rate": 2.3290517907171962e-07, - "loss": 0.5789, + "epoch": 0.61, + "grad_norm": 1.7758076016397153, + "learning_rate": 3.4772275529267895e-06, + "loss": 0.4789, "step": 8604 }, { - "epoch": 0.91, - "grad_norm": 3.2292498265081755, - "learning_rate": 2.3239137380312526e-07, - "loss": 0.6254, + "epoch": 0.61, + "grad_norm": 1.797003567026006, + "learning_rate": 3.4761330174632257e-06, + "loss": 0.5128, "step": 8605 }, { - "epoch": 0.91, - "grad_norm": 2.2190579271215136, - "learning_rate": 2.3187812242151996e-07, - "loss": 0.6202, + "epoch": 0.61, + "grad_norm": 1.5512389643162565, + "learning_rate": 3.4750385624954784e-06, + "loss": 0.5028, "step": 8606 }, { - "epoch": 0.91, - "grad_norm": 2.6556837855127435, - "learning_rate": 2.3136542498653103e-07, - "loss": 0.5697, + "epoch": 0.61, + "grad_norm": 1.599609294590339, + "learning_rate": 3.473944188081362e-06, + "loss": 0.4465, "step": 8607 }, { - "epoch": 0.91, - "grad_norm": 2.923481136322459, - "learning_rate": 2.308532815577219e-07, - "loss": 0.5777, + "epoch": 0.61, + "grad_norm": 2.4101185672505796, + "learning_rate": 3.472849894278682e-06, + "loss": 0.5498, "step": 8608 }, { - "epoch": 0.91, - "grad_norm": 2.1026727157977447, - "learning_rate": 2.3034169219459336e-07, - "loss": 0.5759, + "epoch": 0.61, + "grad_norm": 1.8038988271282983, + "learning_rate": 3.471755681145248e-06, + "loss": 0.5416, "step": 8609 }, { - "epoch": 0.91, - "grad_norm": 2.2566630684707336, - "learning_rate": 2.2983065695657835e-07, - "loss": 0.605, + "epoch": 0.61, + "grad_norm": 1.7638431731433624, + "learning_rate": 3.4706615487388558e-06, + "loss": 0.5209, "step": 8610 }, { - "epoch": 0.91, - "grad_norm": 3.0101849840783994, - "learning_rate": 2.2932017590304945e-07, - "loss": 0.6228, + "epoch": 0.61, + "grad_norm": 1.925043360975848, + "learning_rate": 3.469567497117304e-06, + "loss": 0.5223, "step": 8611 }, { - "epoch": 0.91, - "grad_norm": 2.9371051066532257, - "learning_rate": 2.2881024909331084e-07, - "loss": 0.5492, + "epoch": 0.61, + "grad_norm": 1.8156497932930622, + "learning_rate": 3.4684735263383806e-06, + "loss": 0.5206, "step": 8612 }, { - "epoch": 0.91, - "grad_norm": 3.594191038842626, - "learning_rate": 2.2830087658660626e-07, - "loss": 0.6286, + "epoch": 0.61, + "grad_norm": 1.813503821841933, + "learning_rate": 3.4673796364598765e-06, + "loss": 0.6143, "step": 8613 }, { - "epoch": 0.91, - "grad_norm": 2.2603072217979463, - "learning_rate": 2.2779205844211115e-07, - "loss": 0.704, + "epoch": 0.61, + "grad_norm": 1.5192525632239866, + "learning_rate": 3.466285827539574e-06, + "loss": 0.4692, "step": 8614 }, { - "epoch": 0.91, - "grad_norm": 0.8989627424235347, - "learning_rate": 2.2728379471893992e-07, - "loss": 0.4864, + "epoch": 0.61, + "grad_norm": 1.8270467887401127, + "learning_rate": 3.465192099635249e-06, + "loss": 0.5583, "step": 8615 }, { - "epoch": 0.91, - "grad_norm": 16.729220556087142, - "learning_rate": 2.2677608547614195e-07, - "loss": 0.5918, + "epoch": 0.61, + "grad_norm": 1.9076050656503434, + "learning_rate": 3.4640984528046795e-06, + "loss": 0.5507, "step": 8616 }, { - "epoch": 0.91, - "grad_norm": 2.2488176924383954, - "learning_rate": 2.2626893077269952e-07, - "loss": 0.582, + "epoch": 0.61, + "grad_norm": 1.6868215725175648, + "learning_rate": 3.4630048871056337e-06, + "loss": 0.4731, "step": 8617 }, { - "epoch": 0.91, - "grad_norm": 2.4475963095140343, - "learning_rate": 2.2576233066753328e-07, - "loss": 0.6073, + "epoch": 0.61, + "grad_norm": 2.7849614511369456, + "learning_rate": 3.4619114025958787e-06, + "loss": 0.5375, "step": 8618 }, { - "epoch": 0.91, - "grad_norm": 2.50017796036233, - "learning_rate": 2.252562852194984e-07, - "loss": 0.6705, + "epoch": 0.61, + "grad_norm": 1.5316625511303092, + "learning_rate": 3.4608179993331725e-06, + "loss": 0.5198, "step": 8619 }, { - "epoch": 0.91, - "grad_norm": 1.0355653284721114, - "learning_rate": 2.2475079448738667e-07, - "loss": 0.5797, + "epoch": 0.61, + "grad_norm": 1.706739027702324, + "learning_rate": 3.459724677375278e-06, + "loss": 0.5681, "step": 8620 }, { - "epoch": 0.91, - "grad_norm": 5.740952195511575, - "learning_rate": 2.2424585852992287e-07, - "loss": 0.6276, + "epoch": 0.61, + "grad_norm": 1.6227669831218552, + "learning_rate": 3.4586314367799445e-06, + "loss": 0.4637, "step": 8621 }, { - "epoch": 0.91, - "grad_norm": 2.624621427504004, - "learning_rate": 2.2374147740577058e-07, - "loss": 0.5743, + "epoch": 0.61, + "grad_norm": 1.9206403426201597, + "learning_rate": 3.4575382776049215e-06, + "loss": 0.5003, "step": 8622 }, { - "epoch": 0.91, - "grad_norm": 2.4340730079804325, - "learning_rate": 2.2323765117352625e-07, - "loss": 0.5585, + "epoch": 0.61, + "grad_norm": 1.5388343385147887, + "learning_rate": 3.4564451999079514e-06, + "loss": 0.4844, "step": 8623 }, { - "epoch": 0.91, - "grad_norm": 2.1286673679109693, - "learning_rate": 2.2273437989172308e-07, - "loss": 0.6019, + "epoch": 0.61, + "grad_norm": 1.9585093878232358, + "learning_rate": 3.455352203746778e-06, + "loss": 0.5458, "step": 8624 }, { - "epoch": 0.91, - "grad_norm": 4.029911691462066, - "learning_rate": 2.2223166361883096e-07, - "loss": 0.5091, + "epoch": 0.61, + "grad_norm": 1.647005637683755, + "learning_rate": 3.4542592891791336e-06, + "loss": 0.4669, "step": 8625 }, { - "epoch": 0.91, - "grad_norm": 3.1602463814705746, - "learning_rate": 2.217295024132532e-07, - "loss": 0.627, + "epoch": 0.61, + "grad_norm": 1.8003118173691572, + "learning_rate": 3.453166456262753e-06, + "loss": 0.5275, "step": 8626 }, { - "epoch": 0.91, - "grad_norm": 3.55085504048545, - "learning_rate": 2.2122789633332808e-07, - "loss": 0.6616, + "epoch": 0.61, + "grad_norm": 1.6796122718452906, + "learning_rate": 3.4520737050553597e-06, + "loss": 0.5815, "step": 8627 }, { - "epoch": 0.91, - "grad_norm": 3.3530586363040547, - "learning_rate": 2.2072684543733236e-07, - "loss": 0.6542, + "epoch": 0.61, + "grad_norm": 1.8511951503229302, + "learning_rate": 3.45098103561468e-06, + "loss": 0.4375, "step": 8628 }, { - "epoch": 0.91, - "grad_norm": 2.8871688456843403, - "learning_rate": 2.2022634978347668e-07, - "loss": 0.7099, + "epoch": 0.61, + "grad_norm": 2.0407871421057666, + "learning_rate": 3.449888447998431e-06, + "loss": 0.5277, "step": 8629 }, { - "epoch": 0.91, - "grad_norm": 2.5020918168833344, - "learning_rate": 2.197264094299062e-07, - "loss": 0.6461, + "epoch": 0.61, + "grad_norm": 1.682204350976052, + "learning_rate": 3.4487959422643258e-06, + "loss": 0.5119, "step": 8630 }, { - "epoch": 0.91, - "grad_norm": 2.9739866771027983, - "learning_rate": 2.192270244347039e-07, - "loss": 0.6444, + "epoch": 0.61, + "grad_norm": 1.6878932183954773, + "learning_rate": 3.447703518470075e-06, + "loss": 0.5613, "step": 8631 }, { - "epoch": 0.91, - "grad_norm": 2.453684270588019, - "learning_rate": 2.1872819485588504e-07, - "loss": 0.5701, + "epoch": 0.61, + "grad_norm": 2.042871195740435, + "learning_rate": 3.446611176673384e-06, + "loss": 0.515, "step": 8632 }, { - "epoch": 0.91, - "grad_norm": 2.9298917878750905, - "learning_rate": 2.1822992075140382e-07, - "loss": 0.6017, + "epoch": 0.61, + "grad_norm": 1.8148633910102985, + "learning_rate": 3.445518916931955e-06, + "loss": 0.43, "step": 8633 }, { - "epoch": 0.91, - "grad_norm": 4.21769668421678, - "learning_rate": 2.177322021791478e-07, - "loss": 0.552, + "epoch": 0.61, + "grad_norm": 1.4018971984286506, + "learning_rate": 3.4444267393034823e-06, + "loss": 0.5058, "step": 8634 }, { - "epoch": 0.91, - "grad_norm": 2.333398707424515, - "learning_rate": 2.1723503919694022e-07, - "loss": 0.5818, + "epoch": 0.61, + "grad_norm": 2.034117699742884, + "learning_rate": 3.4433346438456626e-06, + "loss": 0.5595, "step": 8635 }, { - "epoch": 0.91, - "grad_norm": 0.9600492185451742, - "learning_rate": 2.167384318625404e-07, - "loss": 0.5513, + "epoch": 0.61, + "grad_norm": 1.8971252949494033, + "learning_rate": 3.4422426306161823e-06, + "loss": 0.5885, "step": 8636 }, { - "epoch": 0.91, - "grad_norm": 2.1619655902215342, - "learning_rate": 2.1624238023364164e-07, - "loss": 0.6071, + "epoch": 0.61, + "grad_norm": 1.9524806164744102, + "learning_rate": 3.441150699672724e-06, + "loss": 0.5934, "step": 8637 }, { - "epoch": 0.91, - "grad_norm": 2.30009030782602, - "learning_rate": 2.1574688436787616e-07, - "loss": 0.5676, + "epoch": 0.61, + "grad_norm": 2.060682299676636, + "learning_rate": 3.4400588510729653e-06, + "loss": 0.6305, "step": 8638 }, { - "epoch": 0.91, - "grad_norm": 2.261626619968899, - "learning_rate": 2.152519443228074e-07, - "loss": 0.6164, + "epoch": 0.61, + "grad_norm": 1.613404989710232, + "learning_rate": 3.4389670848745853e-06, + "loss": 0.5218, "step": 8639 }, { - "epoch": 0.91, - "grad_norm": 2.5969295591844355, - "learning_rate": 2.1475756015593597e-07, - "loss": 0.577, + "epoch": 0.61, + "grad_norm": 1.6738325295174015, + "learning_rate": 3.4378754011352544e-06, + "loss": 0.5372, "step": 8640 }, { - "epoch": 0.91, - "grad_norm": 2.8285213912360896, - "learning_rate": 2.142637319246982e-07, - "loss": 0.5747, + "epoch": 0.61, + "grad_norm": 1.7116897543054015, + "learning_rate": 3.4367837999126387e-06, + "loss": 0.512, "step": 8641 }, { - "epoch": 0.91, - "grad_norm": 2.6663798524618434, - "learning_rate": 2.1377045968646648e-07, - "loss": 0.6167, + "epoch": 0.61, + "grad_norm": 1.609462177648127, + "learning_rate": 3.4356922812643965e-06, + "loss": 0.5032, "step": 8642 }, { - "epoch": 0.91, - "grad_norm": 3.875625734126703, - "learning_rate": 2.1327774349854669e-07, - "loss": 0.642, + "epoch": 0.61, + "grad_norm": 1.784940138181244, + "learning_rate": 3.4346008452481905e-06, + "loss": 0.5168, "step": 8643 }, { - "epoch": 0.91, - "grad_norm": 1.0023913927338757, - "learning_rate": 2.1278558341818245e-07, - "loss": 0.549, + "epoch": 0.61, + "grad_norm": 1.994258288383099, + "learning_rate": 3.4335094919216727e-06, + "loss": 0.5121, "step": 8644 }, { - "epoch": 0.91, - "grad_norm": 2.492821927184673, - "learning_rate": 2.1229397950254971e-07, - "loss": 0.6158, + "epoch": 0.61, + "grad_norm": 2.205454381757803, + "learning_rate": 3.4324182213424904e-06, + "loss": 0.5259, "step": 8645 }, { - "epoch": 0.91, - "grad_norm": 3.954417059877932, - "learning_rate": 2.1180293180876333e-07, - "loss": 0.5902, + "epoch": 0.61, + "grad_norm": 1.6637040572898558, + "learning_rate": 3.4313270335682903e-06, + "loss": 0.5143, "step": 8646 }, { - "epoch": 0.91, - "grad_norm": 2.4328009320824684, - "learning_rate": 2.11312440393871e-07, - "loss": 0.6403, + "epoch": 0.61, + "grad_norm": 1.9105361910812084, + "learning_rate": 3.4302359286567107e-06, + "loss": 0.4649, "step": 8647 }, { - "epoch": 0.91, - "grad_norm": 2.6647018614968547, - "learning_rate": 2.1082250531485658e-07, - "loss": 0.5715, + "epoch": 0.61, + "grad_norm": 1.7796593830722696, + "learning_rate": 3.4291449066653892e-06, + "loss": 0.5134, "step": 8648 }, { - "epoch": 0.91, - "grad_norm": 1.0060574812970433, - "learning_rate": 2.1033312662863902e-07, - "loss": 0.5627, + "epoch": 0.61, + "grad_norm": 3.518521295497625, + "learning_rate": 3.428053967651955e-06, + "loss": 0.4941, "step": 8649 }, { - "epoch": 0.91, - "grad_norm": 2.641443035192098, - "learning_rate": 2.0984430439207337e-07, - "loss": 0.5118, + "epoch": 0.61, + "grad_norm": 1.8614138367925748, + "learning_rate": 3.426963111674039e-06, + "loss": 0.5849, "step": 8650 }, { - "epoch": 0.91, - "grad_norm": 3.9361538566284255, - "learning_rate": 2.0935603866194975e-07, - "loss": 0.6679, + "epoch": 0.61, + "grad_norm": 0.698779158847941, + "learning_rate": 3.4258723387892606e-06, + "loss": 0.3946, "step": 8651 }, { - "epoch": 0.91, - "grad_norm": 2.39373289597829, - "learning_rate": 2.0886832949499337e-07, - "loss": 0.5668, + "epoch": 0.61, + "grad_norm": 1.6699290795618462, + "learning_rate": 3.4247816490552397e-06, + "loss": 0.4839, "step": 8652 }, { - "epoch": 0.91, - "grad_norm": 3.3861532699457686, - "learning_rate": 2.083811769478644e-07, - "loss": 0.6525, + "epoch": 0.61, + "grad_norm": 5.150985150635657, + "learning_rate": 3.4236910425295877e-06, + "loss": 0.547, "step": 8653 }, { - "epoch": 0.91, - "grad_norm": 3.936995441518683, - "learning_rate": 2.0789458107715876e-07, - "loss": 0.7025, + "epoch": 0.61, + "grad_norm": 4.153213924331854, + "learning_rate": 3.4226005192699176e-06, + "loss": 0.5473, "step": 8654 }, { - "epoch": 0.91, - "grad_norm": 2.349046763306768, - "learning_rate": 2.0740854193940896e-07, - "loss": 0.6309, + "epoch": 0.61, + "grad_norm": 1.932604065261497, + "learning_rate": 3.421510079333833e-06, + "loss": 0.6506, "step": 8655 }, { - "epoch": 0.91, - "grad_norm": 2.2596999928277253, - "learning_rate": 2.0692305959107982e-07, - "loss": 0.6011, + "epoch": 0.61, + "grad_norm": 1.9940902301764873, + "learning_rate": 3.4204197227789354e-06, + "loss": 0.5697, "step": 8656 }, { - "epoch": 0.91, - "grad_norm": 0.9228352521725202, - "learning_rate": 2.0643813408857516e-07, - "loss": 0.4917, + "epoch": 0.61, + "grad_norm": 2.4088539330779293, + "learning_rate": 3.419329449662818e-06, + "loss": 0.4976, "step": 8657 }, { - "epoch": 0.91, - "grad_norm": 2.6385919262613746, - "learning_rate": 2.05953765488231e-07, - "loss": 0.5755, + "epoch": 0.61, + "grad_norm": 1.582088661610243, + "learning_rate": 3.4182392600430774e-06, + "loss": 0.4251, "step": 8658 }, { - "epoch": 0.91, - "grad_norm": 2.2137952255821753, - "learning_rate": 2.0546995384632008e-07, - "loss": 0.6005, + "epoch": 0.61, + "grad_norm": 1.6874182691325592, + "learning_rate": 3.4171491539772987e-06, + "loss": 0.5435, "step": 8659 }, { - "epoch": 0.91, - "grad_norm": 2.5803538966845636, - "learning_rate": 2.0498669921905024e-07, - "loss": 0.7106, + "epoch": 0.61, + "grad_norm": 2.112689960500304, + "learning_rate": 3.416059131523064e-06, + "loss": 0.4929, "step": 8660 }, { - "epoch": 0.91, - "grad_norm": 2.9792364157241105, - "learning_rate": 2.045040016625649e-07, - "loss": 0.5886, + "epoch": 0.61, + "grad_norm": 1.8404314423311119, + "learning_rate": 3.4149691927379537e-06, + "loss": 0.5348, "step": 8661 }, { - "epoch": 0.91, - "grad_norm": 2.3666959842671464, - "learning_rate": 2.04021861232942e-07, - "loss": 0.5773, + "epoch": 0.61, + "grad_norm": 0.7351351634574725, + "learning_rate": 3.413879337679541e-06, + "loss": 0.421, "step": 8662 }, { - "epoch": 0.91, - "grad_norm": 3.6824064705468786, - "learning_rate": 2.0354027798619557e-07, - "loss": 0.6144, + "epoch": 0.61, + "grad_norm": 1.881815962240398, + "learning_rate": 3.4127895664053965e-06, + "loss": 0.5826, "step": 8663 }, { - "epoch": 0.91, - "grad_norm": 2.435875779491401, - "learning_rate": 2.030592519782748e-07, - "loss": 0.6403, + "epoch": 0.61, + "grad_norm": 2.8455034811340165, + "learning_rate": 3.4116998789730842e-06, + "loss": 0.5378, "step": 8664 }, { - "epoch": 0.91, - "grad_norm": 2.6128038565557805, - "learning_rate": 2.0257878326506386e-07, - "loss": 0.6337, + "epoch": 0.61, + "grad_norm": 2.3794682897095276, + "learning_rate": 3.4106102754401684e-06, + "loss": 0.5115, "step": 8665 }, { - "epoch": 0.91, - "grad_norm": 2.282841908523947, - "learning_rate": 2.020988719023814e-07, - "loss": 0.5942, + "epoch": 0.61, + "grad_norm": 1.811074004947556, + "learning_rate": 3.409520755864203e-06, + "loss": 0.5958, "step": 8666 }, { - "epoch": 0.91, - "grad_norm": 4.370234830147251, - "learning_rate": 2.0161951794598233e-07, - "loss": 0.5383, + "epoch": 0.62, + "grad_norm": 0.7093287021883683, + "learning_rate": 3.4084313203027397e-06, + "loss": 0.4473, "step": 8667 }, { - "epoch": 0.91, - "grad_norm": 2.1849633361416223, - "learning_rate": 2.011407214515576e-07, - "loss": 0.6604, + "epoch": 0.62, + "grad_norm": 1.648522436314926, + "learning_rate": 3.4073419688133267e-06, + "loss": 0.551, "step": 8668 }, { - "epoch": 0.91, - "grad_norm": 1.9975177789546006, - "learning_rate": 2.0066248247473108e-07, - "loss": 0.6303, + "epoch": 0.62, + "grad_norm": 0.64521368998871, + "learning_rate": 3.4062527014535075e-06, + "loss": 0.4293, "step": 8669 }, { - "epoch": 0.91, - "grad_norm": 2.522737370998732, - "learning_rate": 2.0018480107106496e-07, - "loss": 0.5688, + "epoch": 0.62, + "grad_norm": 1.792444529117091, + "learning_rate": 3.4051635182808224e-06, + "loss": 0.5477, "step": 8670 }, { - "epoch": 0.91, - "grad_norm": 2.4993421173578714, - "learning_rate": 1.9970767729605268e-07, - "loss": 0.5487, + "epoch": 0.62, + "grad_norm": 0.7402741843603029, + "learning_rate": 3.4040744193528043e-06, + "loss": 0.4381, "step": 8671 }, { - "epoch": 0.91, - "grad_norm": 3.527990417897828, - "learning_rate": 1.992311112051265e-07, - "loss": 0.5992, + "epoch": 0.62, + "grad_norm": 1.725039163527142, + "learning_rate": 3.4029854047269807e-06, + "loss": 0.5215, "step": 8672 }, { - "epoch": 0.91, - "grad_norm": 6.350490216687445, - "learning_rate": 1.9875510285365273e-07, - "loss": 0.5894, + "epoch": 0.62, + "grad_norm": 2.2198063120849194, + "learning_rate": 3.4018964744608818e-06, + "loss": 0.5937, "step": 8673 }, { - "epoch": 0.91, - "grad_norm": 2.5923844574515673, - "learning_rate": 1.9827965229693215e-07, - "loss": 0.6643, + "epoch": 0.62, + "grad_norm": 1.900822413372096, + "learning_rate": 3.400807628612026e-06, + "loss": 0.5652, "step": 8674 }, { - "epoch": 0.91, - "grad_norm": 2.8259634397156805, - "learning_rate": 1.978047595902005e-07, - "loss": 0.5955, + "epoch": 0.62, + "grad_norm": 1.8193036803983371, + "learning_rate": 3.3997188672379288e-06, + "loss": 0.5772, "step": 8675 }, { - "epoch": 0.91, - "grad_norm": 3.233787620175592, - "learning_rate": 1.973304247886304e-07, - "loss": 0.5663, + "epoch": 0.62, + "grad_norm": 1.9118938818426392, + "learning_rate": 3.3986301903961044e-06, + "loss": 0.5529, "step": 8676 }, { - "epoch": 0.91, - "grad_norm": 2.9852555325806147, - "learning_rate": 1.9685664794732884e-07, - "loss": 0.6144, + "epoch": 0.62, + "grad_norm": 1.6266251939683571, + "learning_rate": 3.397541598144059e-06, + "loss": 0.5689, "step": 8677 }, { - "epoch": 0.91, - "grad_norm": 4.705513555230461, - "learning_rate": 1.963834291213368e-07, - "loss": 0.5355, + "epoch": 0.62, + "grad_norm": 1.9911971014897505, + "learning_rate": 3.3964530905392973e-06, + "loss": 0.5332, "step": 8678 }, { - "epoch": 0.91, - "grad_norm": 5.025851027664062, - "learning_rate": 1.959107683656325e-07, - "loss": 0.5568, + "epoch": 0.62, + "grad_norm": 1.782663298535882, + "learning_rate": 3.395364667639317e-06, + "loss": 0.537, "step": 8679 }, { - "epoch": 0.91, - "grad_norm": 2.5826049637506676, - "learning_rate": 1.954386657351276e-07, - "loss": 0.6396, + "epoch": 0.62, + "grad_norm": 1.5432822311263883, + "learning_rate": 3.3942763295016102e-06, + "loss": 0.5644, "step": 8680 }, { - "epoch": 0.91, - "grad_norm": 3.650551552519513, - "learning_rate": 1.9496712128467043e-07, - "loss": 0.6257, + "epoch": 0.62, + "grad_norm": 2.1036298410776286, + "learning_rate": 3.39318807618367e-06, + "loss": 0.5114, "step": 8681 }, { - "epoch": 0.91, - "grad_norm": 3.075805993953122, - "learning_rate": 1.9449613506904275e-07, - "loss": 0.6136, + "epoch": 0.62, + "grad_norm": 1.7247365991192332, + "learning_rate": 3.39209990774298e-06, + "loss": 0.5419, "step": 8682 }, { - "epoch": 0.91, - "grad_norm": 2.6518292689904186, - "learning_rate": 1.9402570714296353e-07, - "loss": 0.5471, + "epoch": 0.62, + "grad_norm": 1.8635104929423267, + "learning_rate": 3.391011824237021e-06, + "loss": 0.4947, "step": 8683 }, { - "epoch": 0.91, - "grad_norm": 2.6510281697671734, - "learning_rate": 1.9355583756108408e-07, - "loss": 0.5416, + "epoch": 0.62, + "grad_norm": 0.7630188709424032, + "learning_rate": 3.389923825723269e-06, + "loss": 0.4662, "step": 8684 }, { - "epoch": 0.91, - "grad_norm": 2.7279535357511, - "learning_rate": 1.9308652637799352e-07, - "loss": 0.5669, + "epoch": 0.62, + "grad_norm": 1.6432060861959497, + "learning_rate": 3.388835912259198e-06, + "loss": 0.5169, "step": 8685 }, { - "epoch": 0.91, - "grad_norm": 2.359794373937808, - "learning_rate": 1.9261777364821542e-07, - "loss": 0.6428, + "epoch": 0.62, + "grad_norm": 0.7436378893697764, + "learning_rate": 3.387748083902273e-06, + "loss": 0.44, "step": 8686 }, { - "epoch": 0.91, - "grad_norm": 0.9251054541397337, - "learning_rate": 1.9214957942620738e-07, - "loss": 0.5481, + "epoch": 0.62, + "grad_norm": 1.6815626134445634, + "learning_rate": 3.3866603407099553e-06, + "loss": 0.5345, "step": 8687 }, { - "epoch": 0.91, - "grad_norm": 2.718945841396238, - "learning_rate": 1.9168194376636308e-07, - "loss": 0.6115, + "epoch": 0.62, + "grad_norm": 1.6078976835107763, + "learning_rate": 3.385572682739707e-06, + "loss": 0.5142, "step": 8688 }, { - "epoch": 0.91, - "grad_norm": 2.0986880391335343, - "learning_rate": 1.912148667230107e-07, - "loss": 0.5174, + "epoch": 0.62, + "grad_norm": 1.4752894209064205, + "learning_rate": 3.3844851100489805e-06, + "loss": 0.481, "step": 8689 }, { - "epoch": 0.91, - "grad_norm": 2.4561034728267597, - "learning_rate": 1.9074834835041523e-07, - "loss": 0.6184, + "epoch": 0.62, + "grad_norm": 1.5824417373169573, + "learning_rate": 3.3833976226952236e-06, + "loss": 0.5216, "step": 8690 }, { - "epoch": 0.91, - "grad_norm": 2.3130570678521942, - "learning_rate": 1.9028238870277383e-07, - "loss": 0.5663, + "epoch": 0.62, + "grad_norm": 1.8663350529602494, + "learning_rate": 3.3823102207358806e-06, + "loss": 0.514, "step": 8691 }, { - "epoch": 0.91, - "grad_norm": 2.233247914946738, - "learning_rate": 1.8981698783422154e-07, - "loss": 0.5151, + "epoch": 0.62, + "grad_norm": 1.7372084891678465, + "learning_rate": 3.3812229042283952e-06, + "loss": 0.5772, "step": 8692 }, { - "epoch": 0.91, - "grad_norm": 2.142511538639313, - "learning_rate": 1.8935214579882622e-07, - "loss": 0.6681, + "epoch": 0.62, + "grad_norm": 1.6541724232330821, + "learning_rate": 3.3801356732302017e-06, + "loss": 0.5317, "step": 8693 }, { - "epoch": 0.91, - "grad_norm": 2.3532758793115693, - "learning_rate": 1.88887862650593e-07, - "loss": 0.5731, + "epoch": 0.62, + "grad_norm": 1.8360050281720424, + "learning_rate": 3.37904852779873e-06, + "loss": 0.5287, "step": 8694 }, { - "epoch": 0.91, - "grad_norm": 2.9857413005512314, - "learning_rate": 1.8842413844345986e-07, - "loss": 0.5648, + "epoch": 0.62, + "grad_norm": 1.528874961048527, + "learning_rate": 3.3779614679914065e-06, + "loss": 0.5326, "step": 8695 }, { - "epoch": 0.92, - "grad_norm": 3.9566011801313214, - "learning_rate": 1.8796097323130202e-07, - "loss": 0.5285, + "epoch": 0.62, + "grad_norm": 1.5873512777811323, + "learning_rate": 3.3768744938656557e-06, + "loss": 0.5498, "step": 8696 }, { - "epoch": 0.92, - "grad_norm": 2.808602535252724, - "learning_rate": 1.8749836706792758e-07, - "loss": 0.6272, + "epoch": 0.62, + "grad_norm": 3.2058477143882076, + "learning_rate": 3.375787605478893e-06, + "loss": 0.591, "step": 8697 }, { - "epoch": 0.92, - "grad_norm": 2.524622740136097, - "learning_rate": 1.8703632000708128e-07, - "loss": 0.5818, + "epoch": 0.62, + "grad_norm": 0.7104820141122891, + "learning_rate": 3.374700802888533e-06, + "loss": 0.4612, "step": 8698 }, { - "epoch": 0.92, - "grad_norm": 12.660611398706083, - "learning_rate": 1.8657483210244298e-07, - "loss": 0.6365, + "epoch": 0.62, + "grad_norm": 1.5145771750535428, + "learning_rate": 3.373614086151984e-06, + "loss": 0.4466, "step": 8699 }, { - "epoch": 0.92, - "grad_norm": 2.387834624346501, - "learning_rate": 1.8611390340762647e-07, - "loss": 0.6027, + "epoch": 0.62, + "grad_norm": 1.5531167678437505, + "learning_rate": 3.3725274553266507e-06, + "loss": 0.5019, "step": 8700 }, { - "epoch": 0.92, - "grad_norm": 3.142755060247217, - "learning_rate": 1.8565353397618057e-07, - "loss": 0.6635, + "epoch": 0.62, + "grad_norm": 1.5329267700508526, + "learning_rate": 3.3714409104699317e-06, + "loss": 0.5157, "step": 8701 }, { - "epoch": 0.92, - "grad_norm": 3.134961455919104, - "learning_rate": 1.8519372386159028e-07, - "loss": 0.5493, + "epoch": 0.62, + "grad_norm": 1.9184291329123824, + "learning_rate": 3.3703544516392206e-06, + "loss": 0.4969, "step": 8702 }, { - "epoch": 0.92, - "grad_norm": 2.8210863113852738, - "learning_rate": 1.8473447311727567e-07, - "loss": 0.6306, + "epoch": 0.62, + "grad_norm": 2.302020837304368, + "learning_rate": 3.3692680788919106e-06, + "loss": 0.5808, "step": 8703 }, { - "epoch": 0.92, - "grad_norm": 2.286001395669524, - "learning_rate": 1.8427578179658957e-07, - "loss": 0.5625, + "epoch": 0.62, + "grad_norm": 2.192828168145422, + "learning_rate": 3.3681817922853864e-06, + "loss": 0.5397, "step": 8704 }, { - "epoch": 0.92, - "grad_norm": 2.1320659323325755, - "learning_rate": 1.8381764995282269e-07, - "loss": 0.6272, + "epoch": 0.62, + "grad_norm": 1.7544172058140637, + "learning_rate": 3.3670955918770286e-06, + "loss": 0.4681, "step": 8705 }, { - "epoch": 0.92, - "grad_norm": 2.786721797593951, - "learning_rate": 1.8336007763919916e-07, - "loss": 0.5459, + "epoch": 0.62, + "grad_norm": 1.7326022392870213, + "learning_rate": 3.366009477724214e-06, + "loss": 0.5634, "step": 8706 }, { - "epoch": 0.92, - "grad_norm": 2.9741697703618373, - "learning_rate": 1.8290306490887866e-07, - "loss": 0.6213, + "epoch": 0.62, + "grad_norm": 1.6442635530563818, + "learning_rate": 3.3649234498843176e-06, + "loss": 0.5235, "step": 8707 }, { - "epoch": 0.92, - "grad_norm": 7.392970288943625, - "learning_rate": 1.8244661181495426e-07, - "loss": 0.5805, + "epoch": 0.62, + "grad_norm": 1.7724380689672161, + "learning_rate": 3.3638375084147048e-06, + "loss": 0.5165, "step": 8708 }, { - "epoch": 0.92, - "grad_norm": 1.9781416987342864, - "learning_rate": 1.8199071841045746e-07, - "loss": 0.575, + "epoch": 0.62, + "grad_norm": 1.9819695917615792, + "learning_rate": 3.362751653372738e-06, + "loss": 0.4727, "step": 8709 }, { - "epoch": 0.92, - "grad_norm": 1.9655678631701408, - "learning_rate": 1.8153538474835086e-07, - "loss": 0.6157, + "epoch": 0.62, + "grad_norm": 1.7370736255479442, + "learning_rate": 3.3616658848157756e-06, + "loss": 0.5223, "step": 8710 }, { - "epoch": 0.92, - "grad_norm": 2.1680563446589924, - "learning_rate": 1.810806108815344e-07, - "loss": 0.5988, + "epoch": 0.62, + "grad_norm": 0.6821214642713133, + "learning_rate": 3.360580202801174e-06, + "loss": 0.4152, "step": 8711 }, { - "epoch": 0.92, - "grad_norm": 2.4250575483212824, - "learning_rate": 1.80626396862843e-07, - "loss": 0.5865, + "epoch": 0.62, + "grad_norm": 1.4619705666751812, + "learning_rate": 3.359494607386281e-06, + "loss": 0.4724, "step": 8712 }, { - "epoch": 0.92, - "grad_norm": 2.1845565551153676, - "learning_rate": 1.801727427450445e-07, - "loss": 0.6155, + "epoch": 0.62, + "grad_norm": 2.195350949697797, + "learning_rate": 3.3584090986284413e-06, + "loss": 0.605, "step": 8713 }, { - "epoch": 0.92, - "grad_norm": 2.311124360025313, - "learning_rate": 1.79719648580845e-07, - "loss": 0.626, + "epoch": 0.62, + "grad_norm": 1.5245448970656623, + "learning_rate": 3.3573236765849948e-06, + "loss": 0.5084, "step": 8714 }, { - "epoch": 0.92, - "grad_norm": 0.9616951924685692, - "learning_rate": 1.7926711442288247e-07, - "loss": 0.5346, + "epoch": 0.62, + "grad_norm": 2.6616741485570996, + "learning_rate": 3.356238341313279e-06, + "loss": 0.546, "step": 8715 }, { - "epoch": 0.92, - "grad_norm": 3.549218920031044, - "learning_rate": 1.7881514032373147e-07, - "loss": 0.6247, + "epoch": 0.62, + "grad_norm": 1.7317094974200704, + "learning_rate": 3.3551530928706243e-06, + "loss": 0.5504, "step": 8716 }, { - "epoch": 0.92, - "grad_norm": 2.8740535822228344, - "learning_rate": 1.7836372633590005e-07, - "loss": 0.5935, + "epoch": 0.62, + "grad_norm": 1.6197301181188668, + "learning_rate": 3.3540679313143547e-06, + "loss": 0.5106, "step": 8717 }, { - "epoch": 0.92, - "grad_norm": 5.583700320203049, - "learning_rate": 1.7791287251183398e-07, - "loss": 0.6242, + "epoch": 0.62, + "grad_norm": 1.606982083849189, + "learning_rate": 3.3529828567017964e-06, + "loss": 0.563, "step": 8718 }, { - "epoch": 0.92, - "grad_norm": 2.7565126848470003, - "learning_rate": 1.7746257890391027e-07, - "loss": 0.5967, + "epoch": 0.62, + "grad_norm": 1.9752174382659495, + "learning_rate": 3.3518978690902626e-06, + "loss": 0.5582, "step": 8719 }, { - "epoch": 0.92, - "grad_norm": 3.684123664351955, - "learning_rate": 1.7701284556444377e-07, - "loss": 0.5584, + "epoch": 0.62, + "grad_norm": 1.8474482585100718, + "learning_rate": 3.3508129685370695e-06, + "loss": 0.5758, "step": 8720 }, { - "epoch": 0.92, - "grad_norm": 2.7777890746410563, - "learning_rate": 1.7656367254568374e-07, - "loss": 0.6293, + "epoch": 0.62, + "grad_norm": 1.6622527822775381, + "learning_rate": 3.349728155099522e-06, + "loss": 0.489, "step": 8721 }, { - "epoch": 0.92, - "grad_norm": 2.4453226597465716, - "learning_rate": 1.7611505989981293e-07, - "loss": 0.6327, + "epoch": 0.62, + "grad_norm": 1.709067352864673, + "learning_rate": 3.3486434288349267e-06, + "loss": 0.5109, "step": 8722 }, { - "epoch": 0.92, - "grad_norm": 3.0727605055674445, - "learning_rate": 1.7566700767894906e-07, - "loss": 0.6328, + "epoch": 0.62, + "grad_norm": 1.6588613551054197, + "learning_rate": 3.347558789800581e-06, + "loss": 0.5557, "step": 8723 }, { - "epoch": 0.92, - "grad_norm": 3.726441601765811, - "learning_rate": 1.7521951593514718e-07, - "loss": 0.5472, + "epoch": 0.62, + "grad_norm": 1.6021685648710173, + "learning_rate": 3.3464742380537794e-06, + "loss": 0.4991, "step": 8724 }, { - "epoch": 0.92, - "grad_norm": 3.130050707112883, - "learning_rate": 1.7477258472039517e-07, - "loss": 0.5057, + "epoch": 0.62, + "grad_norm": 2.0258177447801304, + "learning_rate": 3.3453897736518095e-06, + "loss": 0.5238, "step": 8725 }, { - "epoch": 0.92, - "grad_norm": 2.2032936513711974, - "learning_rate": 1.7432621408661532e-07, - "loss": 0.5802, + "epoch": 0.62, + "grad_norm": 2.072276249437479, + "learning_rate": 3.34430539665196e-06, + "loss": 0.5489, "step": 8726 }, { - "epoch": 0.92, - "grad_norm": 2.577694971816813, - "learning_rate": 1.7388040408566674e-07, - "loss": 0.6737, + "epoch": 0.62, + "grad_norm": 1.8725454293532717, + "learning_rate": 3.3432211071115094e-06, + "loss": 0.5501, "step": 8727 }, { - "epoch": 0.92, - "grad_norm": 2.571554032274808, - "learning_rate": 1.7343515476934136e-07, - "loss": 0.5762, + "epoch": 0.62, + "grad_norm": 1.3853614999313302, + "learning_rate": 3.342136905087733e-06, + "loss": 0.5114, "step": 8728 }, { - "epoch": 0.92, - "grad_norm": 2.2675681775342817, - "learning_rate": 1.729904661893683e-07, - "loss": 0.5929, + "epoch": 0.62, + "grad_norm": 1.9139110569609044, + "learning_rate": 3.341052790637903e-06, + "loss": 0.4895, "step": 8729 }, { - "epoch": 0.92, - "grad_norm": 2.4825069172397907, - "learning_rate": 1.725463383974091e-07, - "loss": 0.5898, + "epoch": 0.62, + "grad_norm": 1.9736040968468074, + "learning_rate": 3.339968763819288e-06, + "loss": 0.5661, "step": 8730 }, { - "epoch": 0.92, - "grad_norm": 0.9735035948128292, - "learning_rate": 1.7210277144506182e-07, - "loss": 0.5335, + "epoch": 0.62, + "grad_norm": 2.947135807016129, + "learning_rate": 3.338884824689147e-06, + "loss": 0.4838, "step": 8731 }, { - "epoch": 0.92, - "grad_norm": 2.631721683378093, - "learning_rate": 1.7165976538385753e-07, - "loss": 0.5517, + "epoch": 0.62, + "grad_norm": 0.7306704436808996, + "learning_rate": 3.3378009733047354e-06, + "loss": 0.4083, "step": 8732 }, { - "epoch": 0.92, - "grad_norm": 1.0076500063785254, - "learning_rate": 1.7121732026526506e-07, - "loss": 0.5407, + "epoch": 0.62, + "grad_norm": 2.0070891191150984, + "learning_rate": 3.3367172097233117e-06, + "loss": 0.5441, "step": 8733 }, { - "epoch": 0.92, - "grad_norm": 3.581508418879825, - "learning_rate": 1.7077543614068604e-07, - "loss": 0.6803, + "epoch": 0.62, + "grad_norm": 1.5771871603849519, + "learning_rate": 3.33563353400212e-06, + "loss": 0.5367, "step": 8734 }, { - "epoch": 0.92, - "grad_norm": 2.4861781722789957, - "learning_rate": 1.703341130614572e-07, - "loss": 0.6355, + "epoch": 0.62, + "grad_norm": 1.9145345593286793, + "learning_rate": 3.3345499461984053e-06, + "loss": 0.5791, "step": 8735 }, { - "epoch": 0.92, - "grad_norm": 3.515627205530569, - "learning_rate": 1.6989335107884863e-07, - "loss": 0.6033, + "epoch": 0.62, + "grad_norm": 1.8357422563510664, + "learning_rate": 3.3334664463694038e-06, + "loss": 0.5064, "step": 8736 }, { - "epoch": 0.92, - "grad_norm": 2.3777058580194343, - "learning_rate": 1.6945315024406883e-07, - "loss": 0.513, + "epoch": 0.62, + "grad_norm": 1.7345692670567234, + "learning_rate": 3.332383034572354e-06, + "loss": 0.5641, "step": 8737 }, { - "epoch": 0.92, - "grad_norm": 0.8876260556728341, - "learning_rate": 1.6901351060825854e-07, - "loss": 0.5056, + "epoch": 0.62, + "grad_norm": 1.4792210902259442, + "learning_rate": 3.331299710864483e-06, + "loss": 0.4185, "step": 8738 }, { - "epoch": 0.92, - "grad_norm": 2.7599871753116862, - "learning_rate": 1.685744322224936e-07, - "loss": 0.5685, + "epoch": 0.62, + "grad_norm": 0.6983032508015882, + "learning_rate": 3.330216475303015e-06, + "loss": 0.4305, "step": 8739 }, { - "epoch": 0.92, - "grad_norm": 3.2326213858288804, - "learning_rate": 1.681359151377848e-07, - "loss": 0.5538, + "epoch": 0.62, + "grad_norm": 2.014625610080385, + "learning_rate": 3.329133327945169e-06, + "loss": 0.4754, "step": 8740 }, { - "epoch": 0.92, - "grad_norm": 2.901673864970551, - "learning_rate": 1.676979594050776e-07, - "loss": 0.6152, + "epoch": 0.62, + "grad_norm": 1.7267475796402738, + "learning_rate": 3.328050268848164e-06, + "loss": 0.5315, "step": 8741 }, { - "epoch": 0.92, - "grad_norm": 0.8826659096790682, - "learning_rate": 1.6726056507525347e-07, - "loss": 0.5152, + "epoch": 0.62, + "grad_norm": 0.7331320059676318, + "learning_rate": 3.3269672980692088e-06, + "loss": 0.4193, "step": 8742 }, { - "epoch": 0.92, - "grad_norm": 2.665982233334478, - "learning_rate": 1.668237321991262e-07, - "loss": 0.5803, + "epoch": 0.62, + "grad_norm": 1.8977198556168882, + "learning_rate": 3.32588441566551e-06, + "loss": 0.6017, "step": 8743 }, { - "epoch": 0.92, - "grad_norm": 2.5198209538212146, - "learning_rate": 1.6638746082744684e-07, - "loss": 0.6382, + "epoch": 0.62, + "grad_norm": 2.028536633135678, + "learning_rate": 3.324801621694268e-06, + "loss": 0.5636, "step": 8744 }, { - "epoch": 0.92, - "grad_norm": 2.568498917912977, - "learning_rate": 1.6595175101089877e-07, - "loss": 0.6567, + "epoch": 0.62, + "grad_norm": 1.8173604061545672, + "learning_rate": 3.3237189162126824e-06, + "loss": 0.5427, "step": 8745 }, { - "epoch": 0.92, - "grad_norm": 3.409410318358604, - "learning_rate": 1.6551660280010316e-07, - "loss": 0.6195, + "epoch": 0.62, + "grad_norm": 1.6062393326220585, + "learning_rate": 3.322636299277944e-06, + "loss": 0.5804, "step": 8746 }, { - "epoch": 0.92, - "grad_norm": 2.719189581656458, - "learning_rate": 1.6508201624561404e-07, - "loss": 0.6318, + "epoch": 0.62, + "grad_norm": 1.8896105544901673, + "learning_rate": 3.321553770947237e-06, + "loss": 0.6159, "step": 8747 }, { - "epoch": 0.92, - "grad_norm": 9.214568272301674, - "learning_rate": 1.6464799139791877e-07, - "loss": 0.5042, + "epoch": 0.62, + "grad_norm": 1.570061667820974, + "learning_rate": 3.32047133127775e-06, + "loss": 0.5648, "step": 8748 }, { - "epoch": 0.92, - "grad_norm": 4.086249960723928, - "learning_rate": 1.6421452830744366e-07, - "loss": 0.6093, + "epoch": 0.62, + "grad_norm": 1.8521180071137868, + "learning_rate": 3.3193889803266568e-06, + "loss": 0.5031, "step": 8749 }, { - "epoch": 0.92, - "grad_norm": 3.992792950152544, - "learning_rate": 1.6378162702454458e-07, - "loss": 0.5714, + "epoch": 0.62, + "grad_norm": 1.8867655262847396, + "learning_rate": 3.3183067181511336e-06, + "loss": 0.59, "step": 8750 }, { - "epoch": 0.92, - "grad_norm": 4.417350418321379, - "learning_rate": 1.6334928759951684e-07, - "loss": 0.6329, + "epoch": 0.62, + "grad_norm": 1.3955502056782982, + "learning_rate": 3.317224544808346e-06, + "loss": 0.4875, "step": 8751 }, { - "epoch": 0.92, - "grad_norm": 2.3306711526974686, - "learning_rate": 1.6291751008258693e-07, - "loss": 0.5744, + "epoch": 0.62, + "grad_norm": 1.503660982162972, + "learning_rate": 3.3161424603554614e-06, + "loss": 0.5118, "step": 8752 }, { - "epoch": 0.92, - "grad_norm": 2.8025552214715774, - "learning_rate": 1.6248629452391862e-07, - "loss": 0.5275, + "epoch": 0.62, + "grad_norm": 2.777343821614377, + "learning_rate": 3.315060464849639e-06, + "loss": 0.4984, "step": 8753 }, { - "epoch": 0.92, - "grad_norm": 2.15974372047649, - "learning_rate": 1.620556409736085e-07, - "loss": 0.6049, + "epoch": 0.62, + "grad_norm": 0.6430606160987455, + "learning_rate": 3.313978558348031e-06, + "loss": 0.4232, "step": 8754 }, { - "epoch": 0.92, - "grad_norm": 3.655812836017327, - "learning_rate": 1.616255494816893e-07, - "loss": 0.5968, + "epoch": 0.62, + "grad_norm": 1.7355417784496763, + "learning_rate": 3.3128967409077865e-06, + "loss": 0.5279, "step": 8755 }, { - "epoch": 0.92, - "grad_norm": 2.7698264187396346, - "learning_rate": 1.6119602009812663e-07, - "loss": 0.5531, + "epoch": 0.62, + "grad_norm": 1.6780742067650607, + "learning_rate": 3.3118150125860554e-06, + "loss": 0.4869, "step": 8756 }, { - "epoch": 0.92, - "grad_norm": 3.424978511207146, - "learning_rate": 1.6076705287282336e-07, - "loss": 0.6074, + "epoch": 0.62, + "grad_norm": 1.7744423075096543, + "learning_rate": 3.3107333734399745e-06, + "loss": 0.4757, "step": 8757 }, { - "epoch": 0.92, - "grad_norm": 2.5086511137119807, - "learning_rate": 1.6033864785561515e-07, - "loss": 0.567, + "epoch": 0.62, + "grad_norm": 1.5184488750126515, + "learning_rate": 3.309651823526682e-06, + "loss": 0.5324, "step": 8758 }, { - "epoch": 0.92, - "grad_norm": 2.707797137818906, - "learning_rate": 1.5991080509627222e-07, - "loss": 0.6671, + "epoch": 0.62, + "grad_norm": 0.7175830555028696, + "learning_rate": 3.3085703629033054e-06, + "loss": 0.4299, "step": 8759 }, { - "epoch": 0.92, - "grad_norm": 2.3155332443393775, - "learning_rate": 1.5948352464450146e-07, - "loss": 0.5503, + "epoch": 0.62, + "grad_norm": 1.7502343041082078, + "learning_rate": 3.307488991626976e-06, + "loss": 0.527, "step": 8760 }, { - "epoch": 0.92, - "grad_norm": 3.2510331858276005, - "learning_rate": 1.590568065499415e-07, - "loss": 0.6343, + "epoch": 0.62, + "grad_norm": 1.7803509969246603, + "learning_rate": 3.3064077097548132e-06, + "loss": 0.5284, "step": 8761 }, { - "epoch": 0.92, - "grad_norm": 2.6772676116951213, - "learning_rate": 1.5863065086216878e-07, - "loss": 0.5982, + "epoch": 0.62, + "grad_norm": 1.5667037330781803, + "learning_rate": 3.3053265173439313e-06, + "loss": 0.5422, "step": 8762 }, { - "epoch": 0.92, - "grad_norm": 2.394546302211017, - "learning_rate": 1.582050576306915e-07, - "loss": 0.6014, + "epoch": 0.62, + "grad_norm": 1.7612782928933564, + "learning_rate": 3.304245414451448e-06, + "loss": 0.5255, "step": 8763 }, { - "epoch": 0.92, - "grad_norm": 2.0972459059941415, - "learning_rate": 1.5778002690495453e-07, - "loss": 0.5364, + "epoch": 0.62, + "grad_norm": 1.6949691001918408, + "learning_rate": 3.3031644011344656e-06, + "loss": 0.5351, "step": 8764 }, { - "epoch": 0.92, - "grad_norm": 2.9104609457881114, - "learning_rate": 1.5735555873433673e-07, - "loss": 0.5498, + "epoch": 0.62, + "grad_norm": 1.8582294466256635, + "learning_rate": 3.302083477450091e-06, + "loss": 0.5148, "step": 8765 }, { - "epoch": 0.92, - "grad_norm": 3.1530370674231816, - "learning_rate": 1.569316531681514e-07, - "loss": 0.6073, + "epoch": 0.62, + "grad_norm": 1.6699717830493317, + "learning_rate": 3.301002643455419e-06, + "loss": 0.5067, "step": 8766 }, { - "epoch": 0.92, - "grad_norm": 2.348747898326347, - "learning_rate": 1.565083102556464e-07, - "loss": 0.5991, + "epoch": 0.62, + "grad_norm": 3.1653379266711354, + "learning_rate": 3.2999218992075454e-06, + "loss": 0.4735, "step": 8767 }, { - "epoch": 0.92, - "grad_norm": 2.64211735957492, - "learning_rate": 1.560855300460057e-07, - "loss": 0.5962, + "epoch": 0.62, + "grad_norm": 1.6440443825412594, + "learning_rate": 3.2988412447635576e-06, + "loss": 0.5807, "step": 8768 }, { - "epoch": 0.92, - "grad_norm": 2.5983785487904294, - "learning_rate": 1.5566331258834498e-07, - "loss": 0.6021, + "epoch": 0.62, + "grad_norm": 1.9731577030414527, + "learning_rate": 3.2977606801805405e-06, + "loss": 0.5098, "step": 8769 }, { - "epoch": 0.92, - "grad_norm": 3.48505144340559, - "learning_rate": 1.552416579317173e-07, - "loss": 0.5457, + "epoch": 0.62, + "grad_norm": 0.7586272671470367, + "learning_rate": 3.29668020551557e-06, + "loss": 0.4173, "step": 8770 }, { - "epoch": 0.92, - "grad_norm": 4.118622729954412, - "learning_rate": 1.5482056612510898e-07, - "loss": 0.5525, + "epoch": 0.62, + "grad_norm": 2.1153836239405117, + "learning_rate": 3.2955998208257233e-06, + "loss": 0.5362, "step": 8771 }, { - "epoch": 0.92, - "grad_norm": 2.453328716580977, - "learning_rate": 1.544000372174409e-07, - "loss": 0.5662, + "epoch": 0.62, + "grad_norm": 4.124606752774784, + "learning_rate": 3.2945195261680707e-06, + "loss": 0.5079, "step": 8772 }, { - "epoch": 0.92, - "grad_norm": 2.715493793911031, - "learning_rate": 1.539800712575701e-07, - "loss": 0.6442, + "epoch": 0.62, + "grad_norm": 1.8262355093624791, + "learning_rate": 3.293439321599675e-06, + "loss": 0.4998, "step": 8773 }, { - "epoch": 0.92, - "grad_norm": 2.534362203603708, - "learning_rate": 1.5356066829428529e-07, - "loss": 0.5944, + "epoch": 0.62, + "grad_norm": 1.613514762223597, + "learning_rate": 3.2923592071775957e-06, + "loss": 0.5172, "step": 8774 }, { - "epoch": 0.92, - "grad_norm": 2.7191866220592633, - "learning_rate": 1.531418283763131e-07, - "loss": 0.605, + "epoch": 0.62, + "grad_norm": 1.8108967659185795, + "learning_rate": 3.2912791829588907e-06, + "loss": 0.5209, "step": 8775 }, { - "epoch": 0.92, - "grad_norm": 3.6751125356427465, - "learning_rate": 1.5272355155231233e-07, - "loss": 0.5287, + "epoch": 0.62, + "grad_norm": 2.144065452721013, + "learning_rate": 3.2901992490006094e-06, + "loss": 0.5653, "step": 8776 }, { - "epoch": 0.92, - "grad_norm": 2.6076140742413605, - "learning_rate": 1.5230583787087693e-07, - "loss": 0.6233, + "epoch": 0.62, + "grad_norm": 1.4240217852935155, + "learning_rate": 3.2891194053597963e-06, + "loss": 0.5072, "step": 8777 }, { - "epoch": 0.92, - "grad_norm": 3.2526568139523055, - "learning_rate": 1.5188868738053643e-07, - "loss": 0.6007, + "epoch": 0.62, + "grad_norm": 2.0040727027960568, + "learning_rate": 3.288039652093494e-06, + "loss": 0.5338, "step": 8778 }, { - "epoch": 0.92, - "grad_norm": 2.556172706540122, - "learning_rate": 1.5147210012975366e-07, - "loss": 0.5975, + "epoch": 0.62, + "grad_norm": 1.581623467424679, + "learning_rate": 3.2869599892587386e-06, + "loss": 0.5379, "step": 8779 }, { - "epoch": 0.92, - "grad_norm": 2.317968435237044, - "learning_rate": 1.5105607616692665e-07, - "loss": 0.5119, + "epoch": 0.62, + "grad_norm": 2.450278753159194, + "learning_rate": 3.2858804169125614e-06, + "loss": 0.5458, "step": 8780 }, { - "epoch": 0.92, - "grad_norm": 2.364058422346575, - "learning_rate": 1.5064061554038723e-07, - "loss": 0.6071, + "epoch": 0.62, + "grad_norm": 1.962922809515889, + "learning_rate": 3.284800935111987e-06, + "loss": 0.6345, "step": 8781 }, { - "epoch": 0.92, - "grad_norm": 2.123856213413403, - "learning_rate": 1.5022571829840404e-07, - "loss": 0.5481, + "epoch": 0.62, + "grad_norm": 2.9471520646468976, + "learning_rate": 3.2837215439140408e-06, + "loss": 0.4892, "step": 8782 }, { - "epoch": 0.92, - "grad_norm": 2.5488265654803324, - "learning_rate": 1.4981138448917686e-07, - "loss": 0.5901, + "epoch": 0.62, + "grad_norm": 1.7706037224192066, + "learning_rate": 3.282642243375738e-06, + "loss": 0.569, "step": 8783 }, { - "epoch": 0.92, - "grad_norm": 3.9357992577530965, - "learning_rate": 1.4939761416084274e-07, - "loss": 0.5701, + "epoch": 0.62, + "grad_norm": 1.6486145798614509, + "learning_rate": 3.2815630335540914e-06, + "loss": 0.5987, "step": 8784 }, { - "epoch": 0.92, - "grad_norm": 2.0256367627019287, - "learning_rate": 1.4898440736147213e-07, - "loss": 0.6026, + "epoch": 0.62, + "grad_norm": 1.6464281226010138, + "learning_rate": 3.280483914506106e-06, + "loss": 0.5133, "step": 8785 }, { - "epoch": 0.92, - "grad_norm": 4.257081804924682, - "learning_rate": 1.4857176413907048e-07, - "loss": 0.5129, + "epoch": 0.62, + "grad_norm": 1.7564117321545594, + "learning_rate": 3.2794048862887883e-06, + "loss": 0.4696, "step": 8786 }, { - "epoch": 0.92, - "grad_norm": 2.7139299347714076, - "learning_rate": 1.481596845415767e-07, - "loss": 0.5782, + "epoch": 0.62, + "grad_norm": 1.651781603556541, + "learning_rate": 3.2783259489591358e-06, + "loss": 0.4591, "step": 8787 }, { - "epoch": 0.92, - "grad_norm": 2.4990484620871873, - "learning_rate": 1.4774816861686636e-07, - "loss": 0.6671, + "epoch": 0.62, + "grad_norm": 2.2172293826649137, + "learning_rate": 3.2772471025741402e-06, + "loss": 0.5185, "step": 8788 }, { - "epoch": 0.92, - "grad_norm": 2.9970898191679884, - "learning_rate": 1.4733721641274677e-07, - "loss": 0.653, + "epoch": 0.62, + "grad_norm": 1.7221375168178839, + "learning_rate": 3.276168347190788e-06, + "loss": 0.5016, "step": 8789 }, { - "epoch": 0.92, - "grad_norm": 3.3568839354768514, - "learning_rate": 1.4692682797696201e-07, - "loss": 0.5761, + "epoch": 0.62, + "grad_norm": 2.481062365562745, + "learning_rate": 3.2750896828660665e-06, + "loss": 0.5287, "step": 8790 }, { - "epoch": 0.93, - "grad_norm": 2.61930351344336, - "learning_rate": 1.4651700335718887e-07, - "loss": 0.6616, + "epoch": 0.62, + "grad_norm": 1.6989048115097212, + "learning_rate": 3.274011109656954e-06, + "loss": 0.589, "step": 8791 }, { - "epoch": 0.93, - "grad_norm": 0.9479674201636344, - "learning_rate": 1.4610774260104155e-07, - "loss": 0.5103, + "epoch": 0.62, + "grad_norm": 1.6073771656780032, + "learning_rate": 3.2729326276204198e-06, + "loss": 0.4542, "step": 8792 }, { - "epoch": 0.93, - "grad_norm": 2.5330814701077635, - "learning_rate": 1.456990457560642e-07, - "loss": 0.5497, + "epoch": 0.62, + "grad_norm": 4.428510629422196, + "learning_rate": 3.2718542368134387e-06, + "loss": 0.4972, "step": 8793 }, { - "epoch": 0.93, - "grad_norm": 2.309748805294827, - "learning_rate": 1.4529091286973994e-07, - "loss": 0.6107, + "epoch": 0.62, + "grad_norm": 2.1827445015129916, + "learning_rate": 3.2707759372929717e-06, + "loss": 0.4738, "step": 8794 }, { - "epoch": 0.93, - "grad_norm": 2.547398975416799, - "learning_rate": 1.4488334398948424e-07, - "loss": 0.6341, + "epoch": 0.62, + "grad_norm": 1.6728501731270036, + "learning_rate": 3.26969772911598e-06, + "loss": 0.5558, "step": 8795 }, { - "epoch": 0.93, - "grad_norm": 2.0790737085851827, - "learning_rate": 1.444763391626458e-07, - "loss": 0.5955, + "epoch": 0.62, + "grad_norm": 1.5106552761137235, + "learning_rate": 3.2686196123394153e-06, + "loss": 0.4885, "step": 8796 }, { - "epoch": 0.93, - "grad_norm": 2.7131868065415845, - "learning_rate": 1.4406989843651186e-07, - "loss": 0.6069, + "epoch": 0.62, + "grad_norm": 1.7491874422972191, + "learning_rate": 3.2675415870202316e-06, + "loss": 0.4892, "step": 8797 }, { - "epoch": 0.93, - "grad_norm": 2.1351827973252804, - "learning_rate": 1.4366402185829852e-07, - "loss": 0.5892, + "epoch": 0.62, + "grad_norm": 2.39076749973051, + "learning_rate": 3.2664636532153714e-06, + "loss": 0.5415, "step": 8798 }, { - "epoch": 0.93, - "grad_norm": 2.532366496256515, - "learning_rate": 1.4325870947516195e-07, - "loss": 0.6275, - "step": 8799 + "epoch": 0.62, + "grad_norm": 1.8904385402759394, + "learning_rate": 3.265385810981774e-06, + "loss": 0.5233, + "step": 8799 }, { - "epoch": 0.93, - "grad_norm": 3.5352178877025415, - "learning_rate": 1.4285396133418894e-07, - "loss": 0.6247, + "epoch": 0.62, + "grad_norm": 1.9259245446769644, + "learning_rate": 3.264308060376376e-06, + "loss": 0.4875, "step": 8800 }, { - "epoch": 0.93, - "grad_norm": 2.3725771799289634, - "learning_rate": 1.424497774824024e-07, - "loss": 0.5422, + "epoch": 0.62, + "grad_norm": 1.5332293840163687, + "learning_rate": 3.2632304014561078e-06, + "loss": 0.5277, "step": 8801 }, { - "epoch": 0.93, - "grad_norm": 2.453635363952836, - "learning_rate": 1.4204615796675813e-07, - "loss": 0.6249, + "epoch": 0.62, + "grad_norm": 1.6325759527138293, + "learning_rate": 3.262152834277895e-06, + "loss": 0.5003, "step": 8802 }, { - "epoch": 0.93, - "grad_norm": 3.6864136459930834, - "learning_rate": 1.4164310283414917e-07, - "loss": 0.6058, + "epoch": 0.62, + "grad_norm": 1.6709570828979896, + "learning_rate": 3.261075358898659e-06, + "loss": 0.4917, "step": 8803 }, { - "epoch": 0.93, - "grad_norm": 2.5101928312119233, - "learning_rate": 1.4124061213139973e-07, - "loss": 0.674, + "epoch": 0.62, + "grad_norm": 1.9396535043472216, + "learning_rate": 3.259997975375312e-06, + "loss": 0.5664, "step": 8804 }, { - "epoch": 0.93, - "grad_norm": 2.5816393554893198, - "learning_rate": 1.4083868590527128e-07, - "loss": 0.56, + "epoch": 0.62, + "grad_norm": 1.7899186275209775, + "learning_rate": 3.2589206837647704e-06, + "loss": 0.5012, "step": 8805 }, { - "epoch": 0.93, - "grad_norm": 1.008204586126537, - "learning_rate": 1.4043732420245703e-07, - "loss": 0.5047, + "epoch": 0.62, + "grad_norm": 0.7058695582727664, + "learning_rate": 3.2578434841239376e-06, + "loss": 0.4368, "step": 8806 }, { - "epoch": 0.93, - "grad_norm": 2.6435612166868547, - "learning_rate": 1.400365270695875e-07, - "loss": 0.6431, + "epoch": 0.62, + "grad_norm": 1.7069594216857484, + "learning_rate": 3.2567663765097147e-06, + "loss": 0.4748, "step": 8807 }, { - "epoch": 0.93, - "grad_norm": 2.3837590071708403, - "learning_rate": 1.3963629455322536e-07, - "loss": 0.6289, + "epoch": 0.63, + "grad_norm": 1.7140813372191206, + "learning_rate": 3.255689360978998e-06, + "loss": 0.5009, "step": 8808 }, { - "epoch": 0.93, - "grad_norm": 2.2165814241637776, - "learning_rate": 1.3923662669986847e-07, - "loss": 0.6364, + "epoch": 0.63, + "grad_norm": 1.963009395356615, + "learning_rate": 3.25461243758868e-06, + "loss": 0.5472, "step": 8809 }, { - "epoch": 0.93, - "grad_norm": 3.8103240249394665, - "learning_rate": 1.388375235559497e-07, - "loss": 0.6198, + "epoch": 0.63, + "grad_norm": 1.6029690176009699, + "learning_rate": 3.2535356063956487e-06, + "loss": 0.513, "step": 8810 }, { - "epoch": 0.93, - "grad_norm": 2.745817288882361, - "learning_rate": 1.3843898516783528e-07, - "loss": 0.6645, + "epoch": 0.63, + "grad_norm": 2.200908604342597, + "learning_rate": 3.252458867456782e-06, + "loss": 0.5643, "step": 8811 }, { - "epoch": 0.93, - "grad_norm": 0.9439152142723546, - "learning_rate": 1.3804101158182592e-07, - "loss": 0.5582, + "epoch": 0.63, + "grad_norm": 1.5890209256493737, + "learning_rate": 3.2513822208289613e-06, + "loss": 0.5055, "step": 8812 }, { - "epoch": 0.93, - "grad_norm": 2.2299912321829636, - "learning_rate": 1.3764360284415745e-07, - "loss": 0.5961, + "epoch": 0.63, + "grad_norm": 1.723558186386323, + "learning_rate": 3.2503056665690567e-06, + "loss": 0.5412, "step": 8813 }, { - "epoch": 0.93, - "grad_norm": 3.274213469206632, - "learning_rate": 1.372467590009996e-07, - "loss": 0.6478, + "epoch": 0.63, + "grad_norm": 1.6174319172625327, + "learning_rate": 3.2492292047339352e-06, + "loss": 0.5176, "step": 8814 }, { - "epoch": 0.93, - "grad_norm": 2.736445144043836, - "learning_rate": 1.3685048009845602e-07, - "loss": 0.5455, + "epoch": 0.63, + "grad_norm": 1.5533850945307162, + "learning_rate": 3.248152835380459e-06, + "loss": 0.4897, "step": 8815 }, { - "epoch": 0.93, - "grad_norm": 2.3511706665739376, - "learning_rate": 1.3645476618256658e-07, - "loss": 0.6518, + "epoch": 0.63, + "grad_norm": 1.692046650517022, + "learning_rate": 3.247076558565487e-06, + "loss": 0.5218, "step": 8816 }, { - "epoch": 0.93, - "grad_norm": 2.9759636145549075, - "learning_rate": 1.3605961729930283e-07, - "loss": 0.6208, + "epoch": 0.63, + "grad_norm": 2.126519466295127, + "learning_rate": 3.2460003743458713e-06, + "loss": 0.4877, "step": 8817 }, { - "epoch": 0.93, - "grad_norm": 2.6395425552448972, - "learning_rate": 1.3566503349457193e-07, - "loss": 0.5986, + "epoch": 0.63, + "grad_norm": 1.6737946007985132, + "learning_rate": 3.2449242827784587e-06, + "loss": 0.4668, "step": 8818 }, { - "epoch": 0.93, - "grad_norm": 2.5256137921850588, - "learning_rate": 1.3527101481421722e-07, - "loss": 0.5586, + "epoch": 0.63, + "grad_norm": 1.9136544766049641, + "learning_rate": 3.2438482839200913e-06, + "loss": 0.5802, "step": 8819 }, { - "epoch": 0.93, - "grad_norm": 2.365812363582891, - "learning_rate": 1.3487756130401264e-07, - "loss": 0.5482, + "epoch": 0.63, + "grad_norm": 1.7763654441412375, + "learning_rate": 3.24277237782761e-06, + "loss": 0.5285, "step": 8820 }, { - "epoch": 0.93, - "grad_norm": 2.41157315584283, - "learning_rate": 1.3448467300966995e-07, - "loss": 0.5167, + "epoch": 0.63, + "grad_norm": 1.6172640032642618, + "learning_rate": 3.2416965645578454e-06, + "loss": 0.5531, "step": 8821 }, { - "epoch": 0.93, - "grad_norm": 2.350934489365909, - "learning_rate": 1.3409234997683262e-07, - "loss": 0.5623, + "epoch": 0.63, + "grad_norm": 1.5639062136345954, + "learning_rate": 3.240620844167626e-06, + "loss": 0.5114, "step": 8822 }, { - "epoch": 0.93, - "grad_norm": 2.8590303256325225, - "learning_rate": 1.3370059225108088e-07, - "loss": 0.615, + "epoch": 0.63, + "grad_norm": 1.735754373471776, + "learning_rate": 3.2395452167137733e-06, + "loss": 0.5243, "step": 8823 }, { - "epoch": 0.93, - "grad_norm": 2.477586205368679, - "learning_rate": 1.3330939987792668e-07, - "loss": 0.6072, + "epoch": 0.63, + "grad_norm": 1.5071975487230058, + "learning_rate": 3.2384696822531103e-06, + "loss": 0.5269, "step": 8824 }, { - "epoch": 0.93, - "grad_norm": 2.401994252587328, - "learning_rate": 1.3291877290281864e-07, - "loss": 0.574, + "epoch": 0.63, + "grad_norm": 1.8411605995400482, + "learning_rate": 3.237394240842447e-06, + "loss": 0.5435, "step": 8825 }, { - "epoch": 0.93, - "grad_norm": 3.7781870354990517, - "learning_rate": 1.3252871137113764e-07, - "loss": 0.56, + "epoch": 0.63, + "grad_norm": 1.8098605838646344, + "learning_rate": 3.2363188925385907e-06, + "loss": 0.5289, "step": 8826 }, { - "epoch": 0.93, - "grad_norm": 2.367412583097113, - "learning_rate": 1.3213921532820084e-07, - "loss": 0.678, + "epoch": 0.63, + "grad_norm": 1.5447150297580017, + "learning_rate": 3.2352436373983488e-06, + "loss": 0.5708, "step": 8827 }, { - "epoch": 0.93, - "grad_norm": 1.9230400310337836, - "learning_rate": 1.3175028481925865e-07, - "loss": 0.6137, + "epoch": 0.63, + "grad_norm": 1.5727054311043593, + "learning_rate": 3.2341684754785173e-06, + "loss": 0.5299, "step": 8828 }, { - "epoch": 0.93, - "grad_norm": 2.6899938815586157, - "learning_rate": 1.3136191988949498e-07, - "loss": 0.5719, + "epoch": 0.63, + "grad_norm": 2.371638342278786, + "learning_rate": 3.23309340683589e-06, + "loss": 0.5328, "step": 8829 }, { - "epoch": 0.93, - "grad_norm": 2.313782561671406, - "learning_rate": 1.3097412058403036e-07, - "loss": 0.6413, + "epoch": 0.63, + "grad_norm": 1.698520171207794, + "learning_rate": 3.2320184315272558e-06, + "loss": 0.5227, "step": 8830 }, { - "epoch": 0.93, - "grad_norm": 2.3968960300093514, - "learning_rate": 1.305868869479171e-07, - "loss": 0.6002, + "epoch": 0.63, + "grad_norm": 1.6490462139394768, + "learning_rate": 3.230943549609399e-06, + "loss": 0.5486, "step": 8831 }, { - "epoch": 0.93, - "grad_norm": 0.9061105754688217, - "learning_rate": 1.3020021902614366e-07, - "loss": 0.5138, + "epoch": 0.63, + "grad_norm": 2.6099302354869924, + "learning_rate": 3.2298687611390988e-06, + "loss": 0.5014, "step": 8832 }, { - "epoch": 0.93, - "grad_norm": 3.361235492021561, - "learning_rate": 1.2981411686363132e-07, - "loss": 0.6635, + "epoch": 0.63, + "grad_norm": 1.6012021741636442, + "learning_rate": 3.2287940661731286e-06, + "loss": 0.4731, "step": 8833 }, { - "epoch": 0.93, - "grad_norm": 2.299548642666442, - "learning_rate": 1.29428580505237e-07, - "loss": 0.574, + "epoch": 0.63, + "grad_norm": 3.7883477216278996, + "learning_rate": 3.227719464768256e-06, + "loss": 0.4982, "step": 8834 }, { - "epoch": 0.93, - "grad_norm": 2.8233240276159557, - "learning_rate": 1.290436099957504e-07, - "loss": 0.6384, + "epoch": 0.63, + "grad_norm": 1.6168369271411065, + "learning_rate": 3.2266449569812485e-06, + "loss": 0.5363, "step": 8835 }, { - "epoch": 0.93, - "grad_norm": 2.4935098889156753, - "learning_rate": 1.2865920537989683e-07, - "loss": 0.5546, + "epoch": 0.63, + "grad_norm": 1.7955985782876087, + "learning_rate": 3.2255705428688633e-06, + "loss": 0.5187, "step": 8836 }, { - "epoch": 0.93, - "grad_norm": 2.7213656000819872, - "learning_rate": 1.2827536670233508e-07, - "loss": 0.6305, + "epoch": 0.63, + "grad_norm": 1.667124828339718, + "learning_rate": 3.2244962224878527e-06, + "loss": 0.4947, "step": 8837 }, { - "epoch": 0.93, - "grad_norm": 2.741318595224126, - "learning_rate": 1.2789209400765889e-07, - "loss": 0.6613, + "epoch": 0.63, + "grad_norm": 1.7950764707758937, + "learning_rate": 3.223421995894968e-06, + "loss": 0.497, "step": 8838 }, { - "epoch": 0.93, - "grad_norm": 3.396722910728486, - "learning_rate": 1.2750938734039486e-07, - "loss": 0.5485, + "epoch": 0.63, + "grad_norm": 1.7778013002698017, + "learning_rate": 3.2223478631469553e-06, + "loss": 0.4968, "step": 8839 }, { - "epoch": 0.93, - "grad_norm": 2.597262875682383, - "learning_rate": 1.2712724674500575e-07, - "loss": 0.6165, + "epoch": 0.63, + "grad_norm": 2.223399310624077, + "learning_rate": 3.2212738243005513e-06, + "loss": 0.5274, "step": 8840 }, { - "epoch": 0.93, - "grad_norm": 2.5915988746408636, - "learning_rate": 1.2674567226588662e-07, - "loss": 0.658, + "epoch": 0.63, + "grad_norm": 2.143800184782339, + "learning_rate": 3.22019987941249e-06, + "loss": 0.4535, "step": 8841 }, { - "epoch": 0.93, - "grad_norm": 12.953891578992364, - "learning_rate": 1.2636466394736758e-07, - "loss": 0.5931, + "epoch": 0.63, + "grad_norm": 2.224501186883041, + "learning_rate": 3.219126028539503e-06, + "loss": 0.5213, "step": 8842 }, { - "epoch": 0.93, - "grad_norm": 4.102318058166209, - "learning_rate": 1.2598422183371484e-07, - "loss": 0.5928, + "epoch": 0.63, + "grad_norm": 2.236901032845963, + "learning_rate": 3.218052271738314e-06, + "loss": 0.5035, "step": 8843 }, { - "epoch": 0.93, - "grad_norm": 3.2543978164700937, - "learning_rate": 1.256043459691253e-07, - "loss": 0.6385, + "epoch": 0.63, + "grad_norm": 1.9849777726078448, + "learning_rate": 3.216978609065641e-06, + "loss": 0.5525, "step": 8844 }, { - "epoch": 0.93, - "grad_norm": 0.9749011154837868, - "learning_rate": 1.2522503639773254e-07, - "loss": 0.5538, + "epoch": 0.63, + "grad_norm": 1.72619192613113, + "learning_rate": 3.2159050405781996e-06, + "loss": 0.5312, "step": 8845 }, { - "epoch": 0.93, - "grad_norm": 2.8982951468251628, - "learning_rate": 1.2484629316360297e-07, - "loss": 0.5987, + "epoch": 0.63, + "grad_norm": 2.428814182269306, + "learning_rate": 3.214831566332699e-06, + "loss": 0.5307, "step": 8846 }, { - "epoch": 0.93, - "grad_norm": 4.042274517432498, - "learning_rate": 1.244681163107392e-07, - "loss": 0.5936, + "epoch": 0.63, + "grad_norm": 2.1267841643936136, + "learning_rate": 3.2137581863858453e-06, + "loss": 0.4888, "step": 8847 }, { - "epoch": 0.93, - "grad_norm": 2.543924226290593, - "learning_rate": 1.2409050588307547e-07, - "loss": 0.6316, + "epoch": 0.63, + "grad_norm": 2.7612964070781665, + "learning_rate": 3.2126849007943365e-06, + "loss": 0.5022, "step": 8848 }, { - "epoch": 0.93, - "grad_norm": 2.275882847499862, - "learning_rate": 1.237134619244823e-07, - "loss": 0.6165, + "epoch": 0.63, + "grad_norm": 1.7402150835335857, + "learning_rate": 3.2116117096148658e-06, + "loss": 0.4739, "step": 8849 }, { - "epoch": 0.93, - "grad_norm": 2.5971023387508803, - "learning_rate": 1.2333698447876296e-07, - "loss": 0.6973, + "epoch": 0.63, + "grad_norm": 1.8809436669449568, + "learning_rate": 3.2105386129041267e-06, + "loss": 0.5313, "step": 8850 }, { - "epoch": 0.93, - "grad_norm": 2.306532467495735, - "learning_rate": 1.229610735896558e-07, - "loss": 0.5684, + "epoch": 0.63, + "grad_norm": 1.9217937425837572, + "learning_rate": 3.2094656107188005e-06, + "loss": 0.4891, "step": 8851 }, { - "epoch": 0.93, - "grad_norm": 2.5310472966419377, - "learning_rate": 1.2258572930083313e-07, - "loss": 0.7, + "epoch": 0.63, + "grad_norm": 1.9173151380642097, + "learning_rate": 3.2083927031155683e-06, + "loss": 0.56, "step": 8852 }, { - "epoch": 0.93, - "grad_norm": 2.187034442784481, - "learning_rate": 1.222109516559006e-07, - "loss": 0.5667, + "epoch": 0.63, + "grad_norm": 1.7041845533055937, + "learning_rate": 3.2073198901511028e-06, + "loss": 0.4731, "step": 8853 }, { - "epoch": 0.93, - "grad_norm": 2.2888506566786493, - "learning_rate": 1.2183674069840057e-07, - "loss": 0.6023, + "epoch": 0.63, + "grad_norm": 1.519594752880686, + "learning_rate": 3.206247171882077e-06, + "loss": 0.5479, "step": 8854 }, { - "epoch": 0.93, - "grad_norm": 1.9527027435099331, - "learning_rate": 1.2146309647180554e-07, - "loss": 0.4499, + "epoch": 0.63, + "grad_norm": 1.5649561305130497, + "learning_rate": 3.205174548365154e-06, + "loss": 0.517, "step": 8855 }, { - "epoch": 0.93, - "grad_norm": 2.730891050434837, - "learning_rate": 1.2109001901952633e-07, - "loss": 0.6274, + "epoch": 0.63, + "grad_norm": 3.7613930744223207, + "learning_rate": 3.2041020196569907e-06, + "loss": 0.4536, "step": 8856 }, { - "epoch": 0.93, - "grad_norm": 0.916048809399537, - "learning_rate": 1.2071750838490492e-07, - "loss": 0.5672, + "epoch": 0.63, + "grad_norm": 1.8289240718468172, + "learning_rate": 3.2030295858142457e-06, + "loss": 0.5744, "step": 8857 }, { - "epoch": 0.93, - "grad_norm": 2.2808207217906893, - "learning_rate": 1.2034556461121894e-07, - "loss": 0.6706, + "epoch": 0.63, + "grad_norm": 1.8977605637114223, + "learning_rate": 3.201957246893568e-06, + "loss": 0.5428, "step": 8858 }, { - "epoch": 0.93, - "grad_norm": 2.48748766003941, - "learning_rate": 1.1997418774167934e-07, - "loss": 0.6039, + "epoch": 0.63, + "grad_norm": 1.6572344599573299, + "learning_rate": 3.2008850029515993e-06, + "loss": 0.503, "step": 8859 }, { - "epoch": 0.93, - "grad_norm": 2.37457209874063, - "learning_rate": 1.196033778194322e-07, - "loss": 0.6669, + "epoch": 0.63, + "grad_norm": 0.703639281063879, + "learning_rate": 3.1998128540449814e-06, + "loss": 0.4451, "step": 8860 }, { - "epoch": 0.93, - "grad_norm": 2.1767809049061233, - "learning_rate": 1.1923313488755638e-07, - "loss": 0.5488, + "epoch": 0.63, + "grad_norm": 2.048165093412023, + "learning_rate": 3.198740800230348e-06, + "loss": 0.54, "step": 8861 }, { - "epoch": 0.93, - "grad_norm": 2.2318763739789023, - "learning_rate": 1.1886345898906693e-07, - "loss": 0.6166, + "epoch": 0.63, + "grad_norm": 4.119196361087267, + "learning_rate": 3.1976688415643296e-06, + "loss": 0.6413, "step": 8862 }, { - "epoch": 0.93, - "grad_norm": 2.8605686514104973, - "learning_rate": 1.1849435016691003e-07, - "loss": 0.606, + "epoch": 0.63, + "grad_norm": 2.069504954444917, + "learning_rate": 3.196596978103551e-06, + "loss": 0.4928, "step": 8863 }, { - "epoch": 0.93, - "grad_norm": 2.908693940117577, - "learning_rate": 1.1812580846396915e-07, - "loss": 0.5751, + "epoch": 0.63, + "grad_norm": 1.6724020509815019, + "learning_rate": 3.1955252099046287e-06, + "loss": 0.4887, "step": 8864 }, { - "epoch": 0.93, - "grad_norm": 3.700097097894084, - "learning_rate": 1.1775783392305895e-07, - "loss": 0.6018, + "epoch": 0.63, + "grad_norm": 2.3532014702392527, + "learning_rate": 3.1944535370241815e-06, + "loss": 0.5333, "step": 8865 }, { - "epoch": 0.93, - "grad_norm": 2.782107805001388, - "learning_rate": 1.1739042658693079e-07, - "loss": 0.5801, + "epoch": 0.63, + "grad_norm": 1.6817071004016269, + "learning_rate": 3.193381959518814e-06, + "loss": 0.5162, "step": 8866 }, { - "epoch": 0.93, - "grad_norm": 2.6177299400948395, - "learning_rate": 1.1702358649826939e-07, - "loss": 0.6479, + "epoch": 0.63, + "grad_norm": 1.8671215652671114, + "learning_rate": 3.192310477445136e-06, + "loss": 0.5097, "step": 8867 }, { - "epoch": 0.93, - "grad_norm": 2.350114941297819, - "learning_rate": 1.166573136996918e-07, - "loss": 0.5391, + "epoch": 0.63, + "grad_norm": 1.9615373145463573, + "learning_rate": 3.1912390908597407e-06, + "loss": 0.4868, "step": 8868 }, { - "epoch": 0.93, - "grad_norm": 2.5754262051694763, - "learning_rate": 1.1629160823375118e-07, - "loss": 0.6068, + "epoch": 0.63, + "grad_norm": 2.560988701924626, + "learning_rate": 3.1901677998192276e-06, + "loss": 0.5307, "step": 8869 }, { - "epoch": 0.93, - "grad_norm": 3.382289379949096, - "learning_rate": 1.1592647014293412e-07, - "loss": 0.6544, + "epoch": 0.63, + "grad_norm": 1.6477806114724451, + "learning_rate": 3.189096604380184e-06, + "loss": 0.5499, "step": 8870 }, { - "epoch": 0.93, - "grad_norm": 2.3588925482663523, - "learning_rate": 1.1556189946966168e-07, - "loss": 0.5807, + "epoch": 0.63, + "grad_norm": 1.6299894975318883, + "learning_rate": 3.188025504599194e-06, + "loss": 0.5225, "step": 8871 }, { - "epoch": 0.93, - "grad_norm": 2.582600577577335, - "learning_rate": 1.151978962562883e-07, - "loss": 0.6165, + "epoch": 0.63, + "grad_norm": 1.7495043542644595, + "learning_rate": 3.1869545005328352e-06, + "loss": 0.4393, "step": 8872 }, { - "epoch": 0.93, - "grad_norm": 2.245726951542925, - "learning_rate": 1.1483446054510294e-07, - "loss": 0.5569, + "epoch": 0.63, + "grad_norm": 2.334187180576593, + "learning_rate": 3.1858835922376843e-06, + "loss": 0.5552, "step": 8873 }, { - "epoch": 0.93, - "grad_norm": 2.638961684798639, - "learning_rate": 1.144715923783274e-07, - "loss": 0.6425, + "epoch": 0.63, + "grad_norm": 1.88676101440517, + "learning_rate": 3.184812779770308e-06, + "loss": 0.5329, "step": 8874 }, { - "epoch": 0.93, - "grad_norm": 4.337475255641479, - "learning_rate": 1.1410929179812069e-07, - "loss": 0.6021, + "epoch": 0.63, + "grad_norm": 1.843975804988695, + "learning_rate": 3.1837420631872704e-06, + "loss": 0.5103, "step": 8875 }, { - "epoch": 0.93, - "grad_norm": 2.6499250104828334, - "learning_rate": 1.1374755884657195e-07, - "loss": 0.589, + "epoch": 0.63, + "grad_norm": 1.538061725992317, + "learning_rate": 3.1826714425451343e-06, + "loss": 0.4815, "step": 8876 }, { - "epoch": 0.93, - "grad_norm": 3.7819394045598353, - "learning_rate": 1.1338639356570758e-07, - "loss": 0.6157, + "epoch": 0.63, + "grad_norm": 2.0041269282416097, + "learning_rate": 3.1816009179004502e-06, + "loss": 0.563, "step": 8877 }, { - "epoch": 0.93, - "grad_norm": 3.9903219653791866, - "learning_rate": 1.130257959974862e-07, - "loss": 0.566, + "epoch": 0.63, + "grad_norm": 1.638735922471186, + "learning_rate": 3.1805304893097666e-06, + "loss": 0.5191, "step": 8878 }, { - "epoch": 0.93, - "grad_norm": 0.9474747256626247, - "learning_rate": 1.1266576618380098e-07, - "loss": 0.5611, + "epoch": 0.63, + "grad_norm": 0.6791779937912032, + "learning_rate": 3.179460156829626e-06, + "loss": 0.4364, "step": 8879 }, { - "epoch": 0.93, - "grad_norm": 2.8711918810126424, - "learning_rate": 1.1230630416647958e-07, - "loss": 0.5494, + "epoch": 0.63, + "grad_norm": 1.9983680865835123, + "learning_rate": 3.1783899205165713e-06, + "loss": 0.5043, "step": 8880 }, { - "epoch": 0.93, - "grad_norm": 2.991121349490518, - "learning_rate": 1.1194740998728193e-07, - "loss": 0.6804, + "epoch": 0.63, + "grad_norm": 1.9992011112974084, + "learning_rate": 3.1773197804271314e-06, + "loss": 0.5856, "step": 8881 }, { - "epoch": 0.93, - "grad_norm": 3.1006023896574195, - "learning_rate": 1.1158908368790523e-07, - "loss": 0.7024, + "epoch": 0.63, + "grad_norm": 1.6819113025305745, + "learning_rate": 3.176249736617839e-06, + "loss": 0.5095, "step": 8882 }, { - "epoch": 0.93, - "grad_norm": 2.1579700060562166, - "learning_rate": 1.1123132530997727e-07, - "loss": 0.6184, + "epoch": 0.63, + "grad_norm": 1.665053879346903, + "learning_rate": 3.1751797891452123e-06, + "loss": 0.518, "step": 8883 }, { - "epoch": 0.93, - "grad_norm": 2.466363894718886, - "learning_rate": 1.1087413489506205e-07, - "loss": 0.6905, + "epoch": 0.63, + "grad_norm": 3.2013840883739455, + "learning_rate": 3.1741099380657747e-06, + "loss": 0.5234, "step": 8884 }, { - "epoch": 0.93, - "grad_norm": 3.6365001531373107, - "learning_rate": 1.1051751248465691e-07, - "loss": 0.5644, + "epoch": 0.63, + "grad_norm": 1.620405058088708, + "learning_rate": 3.1730401834360374e-06, + "loss": 0.4847, "step": 8885 }, { - "epoch": 0.94, - "grad_norm": 3.007115123145037, - "learning_rate": 1.1016145812019319e-07, - "loss": 0.6451, + "epoch": 0.63, + "grad_norm": 0.7094216013713207, + "learning_rate": 3.1719705253125077e-06, + "loss": 0.4432, "step": 8886 }, { - "epoch": 0.94, - "grad_norm": 2.339803693912085, - "learning_rate": 1.098059718430361e-07, - "loss": 0.6171, + "epoch": 0.63, + "grad_norm": 1.951322121397613, + "learning_rate": 3.1709009637516873e-06, + "loss": 0.5012, "step": 8887 }, { - "epoch": 0.94, - "grad_norm": 4.514752409454256, - "learning_rate": 1.0945105369448483e-07, - "loss": 0.6357, + "epoch": 0.63, + "grad_norm": 1.9668804443441663, + "learning_rate": 3.169831498810079e-06, + "loss": 0.5914, "step": 8888 }, { - "epoch": 0.94, - "grad_norm": 3.1616055453161453, - "learning_rate": 1.0909670371577308e-07, - "loss": 0.6318, + "epoch": 0.63, + "grad_norm": 2.266091473315016, + "learning_rate": 3.1687621305441697e-06, + "loss": 0.5139, "step": 8889 }, { - "epoch": 0.94, - "grad_norm": 2.3220638444142474, - "learning_rate": 1.087429219480679e-07, - "loss": 0.6093, + "epoch": 0.63, + "grad_norm": 1.550962411101854, + "learning_rate": 3.1676928590104496e-06, + "loss": 0.5016, "step": 8890 }, { - "epoch": 0.94, - "grad_norm": 2.3976532258477015, - "learning_rate": 1.0838970843247143e-07, - "loss": 0.6613, + "epoch": 0.63, + "grad_norm": 1.6751889401452973, + "learning_rate": 3.166623684265403e-06, + "loss": 0.5255, "step": 8891 }, { - "epoch": 0.94, - "grad_norm": 2.661253383224113, - "learning_rate": 1.0803706321001805e-07, - "loss": 0.6697, + "epoch": 0.63, + "grad_norm": 1.5534663213507676, + "learning_rate": 3.165554606365506e-06, + "loss": 0.4674, "step": 8892 }, { - "epoch": 0.94, - "grad_norm": 2.6683239924659907, - "learning_rate": 1.0768498632167779e-07, - "loss": 0.6107, + "epoch": 0.63, + "grad_norm": 1.842189164651779, + "learning_rate": 3.16448562536723e-06, + "loss": 0.5953, "step": 8893 }, { - "epoch": 0.94, - "grad_norm": 2.5718421574140513, - "learning_rate": 1.0733347780835346e-07, - "loss": 0.5767, + "epoch": 0.63, + "grad_norm": 1.9416804793643982, + "learning_rate": 3.1634167413270412e-06, + "loss": 0.5485, "step": 8894 }, { - "epoch": 0.94, - "grad_norm": 2.146294517128654, - "learning_rate": 1.0698253771088241e-07, - "loss": 0.591, + "epoch": 0.63, + "grad_norm": 1.7330105137932992, + "learning_rate": 3.1623479543014055e-06, + "loss": 0.4611, "step": 8895 }, { - "epoch": 0.94, - "grad_norm": 2.2452315585617613, - "learning_rate": 1.0663216607003535e-07, - "loss": 0.5671, + "epoch": 0.63, + "grad_norm": 1.5590603585473128, + "learning_rate": 3.1612792643467773e-06, + "loss": 0.5889, "step": 8896 }, { - "epoch": 0.94, - "grad_norm": 2.3863559337974825, - "learning_rate": 1.0628236292651861e-07, - "loss": 0.5705, + "epoch": 0.63, + "grad_norm": 0.7665545622398862, + "learning_rate": 3.160210671519609e-06, + "loss": 0.4403, "step": 8897 }, { - "epoch": 0.94, - "grad_norm": 2.6440910292337767, - "learning_rate": 1.0593312832097025e-07, - "loss": 0.5802, + "epoch": 0.63, + "grad_norm": 1.500678656679247, + "learning_rate": 3.159142175876345e-06, + "loss": 0.4917, "step": 8898 }, { - "epoch": 0.94, - "grad_norm": 0.9645714063530803, - "learning_rate": 1.055844622939639e-07, - "loss": 0.5883, + "epoch": 0.63, + "grad_norm": 1.6802750171088427, + "learning_rate": 3.1580737774734312e-06, + "loss": 0.529, "step": 8899 }, { - "epoch": 0.94, - "grad_norm": 2.58708264588437, - "learning_rate": 1.0523636488600664e-07, - "loss": 0.5565, + "epoch": 0.63, + "grad_norm": 1.8622255711741527, + "learning_rate": 3.1570054763673014e-06, + "loss": 0.4951, "step": 8900 }, { - "epoch": 0.94, - "grad_norm": 2.265603984151188, - "learning_rate": 1.048888361375383e-07, - "loss": 0.6022, + "epoch": 0.63, + "grad_norm": 1.7471254122887967, + "learning_rate": 3.155937272614388e-06, + "loss": 0.4983, "step": 8901 }, { - "epoch": 0.94, - "grad_norm": 2.357562605848145, - "learning_rate": 1.045418760889355e-07, - "loss": 0.5128, + "epoch": 0.63, + "grad_norm": 1.769529339941457, + "learning_rate": 3.154869166271114e-06, + "loss": 0.5864, "step": 8902 }, { - "epoch": 0.94, - "grad_norm": 3.4292025931619565, - "learning_rate": 1.0419548478050601e-07, - "loss": 0.5251, + "epoch": 0.63, + "grad_norm": 1.7349614858583104, + "learning_rate": 3.153801157393903e-06, + "loss": 0.5808, "step": 8903 }, { - "epoch": 0.94, - "grad_norm": 2.0323838830808834, - "learning_rate": 1.038496622524926e-07, - "loss": 0.5835, + "epoch": 0.63, + "grad_norm": 2.0771345307694147, + "learning_rate": 3.1527332460391723e-06, + "loss": 0.5127, "step": 8904 }, { - "epoch": 0.94, - "grad_norm": 4.6168280543265166, - "learning_rate": 1.0350440854507205e-07, - "loss": 0.636, + "epoch": 0.63, + "grad_norm": 1.6641947466980234, + "learning_rate": 3.1516654322633296e-06, + "loss": 0.5366, "step": 8905 }, { - "epoch": 0.94, - "grad_norm": 2.052977096577669, - "learning_rate": 1.0315972369835559e-07, - "loss": 0.6365, + "epoch": 0.63, + "grad_norm": 1.806735590087179, + "learning_rate": 3.150597716122783e-06, + "loss": 0.442, "step": 8906 }, { - "epoch": 0.94, - "grad_norm": 2.2130917241762083, - "learning_rate": 1.0281560775238619e-07, - "loss": 0.6581, + "epoch": 0.63, + "grad_norm": 1.527521586032973, + "learning_rate": 3.149530097673932e-06, + "loss": 0.4992, "step": 8907 }, { - "epoch": 0.94, - "grad_norm": 3.7050724972147653, - "learning_rate": 1.0247206074714411e-07, - "loss": 0.6105, + "epoch": 0.63, + "grad_norm": 2.318084612396937, + "learning_rate": 3.148462576973171e-06, + "loss": 0.6277, "step": 8908 }, { - "epoch": 0.94, - "grad_norm": 5.090324313422908, - "learning_rate": 1.0212908272253963e-07, - "loss": 0.6013, + "epoch": 0.63, + "grad_norm": 1.7658537467646718, + "learning_rate": 3.147395154076889e-06, + "loss": 0.5158, "step": 8909 }, { - "epoch": 0.94, - "grad_norm": 3.5235691542628236, - "learning_rate": 1.0178667371842088e-07, - "loss": 0.6415, + "epoch": 0.63, + "grad_norm": 1.5660830161559902, + "learning_rate": 3.1463278290414744e-06, + "loss": 0.5913, "step": 8910 }, { - "epoch": 0.94, - "grad_norm": 2.4523022912907857, - "learning_rate": 1.014448337745666e-07, - "loss": 0.6337, + "epoch": 0.63, + "grad_norm": 1.7433083625846246, + "learning_rate": 3.1452606019233043e-06, + "loss": 0.5596, "step": 8911 }, { - "epoch": 0.94, - "grad_norm": 2.318885308337774, - "learning_rate": 1.0110356293069168e-07, - "loss": 0.6198, + "epoch": 0.63, + "grad_norm": 1.6464210485947475, + "learning_rate": 3.144193472778755e-06, + "loss": 0.5145, "step": 8912 }, { - "epoch": 0.94, - "grad_norm": 2.0757919001418195, - "learning_rate": 1.0076286122644274e-07, - "loss": 0.6183, + "epoch": 0.63, + "grad_norm": 1.7672273031569048, + "learning_rate": 3.1431264416641917e-06, + "loss": 0.5041, "step": 8913 }, { - "epoch": 0.94, - "grad_norm": 2.5362575196035597, - "learning_rate": 1.0042272870140258e-07, - "loss": 0.6237, + "epoch": 0.63, + "grad_norm": 1.927528887344882, + "learning_rate": 3.1420595086359846e-06, + "loss": 0.5102, "step": 8914 }, { - "epoch": 0.94, - "grad_norm": 2.31713074091154, - "learning_rate": 1.0008316539508733e-07, - "loss": 0.6719, + "epoch": 0.63, + "grad_norm": 1.8530578782897877, + "learning_rate": 3.140992673750489e-06, + "loss": 0.5448, "step": 8915 }, { - "epoch": 0.94, - "grad_norm": 2.767274831301876, - "learning_rate": 9.974417134694491e-08, - "loss": 0.5058, + "epoch": 0.63, + "grad_norm": 2.44020890865194, + "learning_rate": 3.1399259370640613e-06, + "loss": 0.5214, "step": 8916 }, { - "epoch": 0.94, - "grad_norm": 0.9939390220324551, - "learning_rate": 9.940574659635993e-08, - "loss": 0.5415, + "epoch": 0.63, + "grad_norm": 2.3836434513752036, + "learning_rate": 3.1388592986330446e-06, + "loss": 0.5054, "step": 8917 }, { - "epoch": 0.94, - "grad_norm": 3.3485962773421636, - "learning_rate": 9.90678911826487e-08, - "loss": 0.5924, + "epoch": 0.63, + "grad_norm": 1.579434860162173, + "learning_rate": 3.1377927585137878e-06, + "loss": 0.4978, "step": 8918 }, { - "epoch": 0.94, - "grad_norm": 2.8816111251299037, - "learning_rate": 9.873060514506316e-08, - "loss": 0.6027, + "epoch": 0.63, + "grad_norm": 1.690735797584824, + "learning_rate": 3.1367263167626284e-06, + "loss": 0.5, "step": 8919 }, { - "epoch": 0.94, - "grad_norm": 1.0275962573135506, - "learning_rate": 9.839388852278752e-08, - "loss": 0.5166, + "epoch": 0.63, + "grad_norm": 0.8695682063603708, + "learning_rate": 3.1356599734358966e-06, + "loss": 0.4106, "step": 8920 }, { - "epoch": 0.94, - "grad_norm": 2.5666272152464558, - "learning_rate": 9.805774135494106e-08, - "loss": 0.6445, + "epoch": 0.63, + "grad_norm": 0.722244603056578, + "learning_rate": 3.1345937285899243e-06, + "loss": 0.4486, "step": 8921 }, { - "epoch": 0.94, - "grad_norm": 2.4748907614724827, - "learning_rate": 9.772216368057586e-08, - "loss": 0.6334, + "epoch": 0.63, + "grad_norm": 1.9619560491423496, + "learning_rate": 3.1335275822810324e-06, + "loss": 0.6416, "step": 8922 }, { - "epoch": 0.94, - "grad_norm": 2.942060956680936, - "learning_rate": 9.738715553867851e-08, - "loss": 0.6156, + "epoch": 0.63, + "grad_norm": 1.710842740818859, + "learning_rate": 3.1324615345655373e-06, + "loss": 0.5248, "step": 8923 }, { - "epoch": 0.94, - "grad_norm": 3.9951301127561973, - "learning_rate": 9.705271696816954e-08, - "loss": 0.5977, + "epoch": 0.63, + "grad_norm": 1.6252746356188683, + "learning_rate": 3.1313955854997513e-06, + "loss": 0.5121, "step": 8924 }, { - "epoch": 0.94, - "grad_norm": 2.777453454552441, - "learning_rate": 9.671884800790288e-08, - "loss": 0.6252, + "epoch": 0.63, + "grad_norm": 1.687793358503084, + "learning_rate": 3.130329735139983e-06, + "loss": 0.5735, "step": 8925 }, { - "epoch": 0.94, - "grad_norm": 2.348283390971774, - "learning_rate": 9.638554869666695e-08, - "loss": 0.6118, + "epoch": 0.63, + "grad_norm": 1.7429534324367386, + "learning_rate": 3.1292639835425332e-06, + "loss": 0.5528, "step": 8926 }, { - "epoch": 0.94, - "grad_norm": 2.4067385375615493, - "learning_rate": 9.605281907318243e-08, - "loss": 0.6271, + "epoch": 0.63, + "grad_norm": 0.7702870025998363, + "learning_rate": 3.128198330763699e-06, + "loss": 0.4394, "step": 8927 }, { - "epoch": 0.94, - "grad_norm": 0.9552504322728489, - "learning_rate": 9.572065917610618e-08, - "loss": 0.5533, + "epoch": 0.63, + "grad_norm": 0.6812226705363837, + "learning_rate": 3.1271327768597698e-06, + "loss": 0.426, "step": 8928 }, { - "epoch": 0.94, - "grad_norm": 2.271928583094003, - "learning_rate": 9.538906904402623e-08, - "loss": 0.6035, + "epoch": 0.63, + "grad_norm": 2.2456042845526794, + "learning_rate": 3.1260673218870353e-06, + "loss": 0.6127, "step": 8929 }, { - "epoch": 0.94, - "grad_norm": 3.3223263454773297, - "learning_rate": 9.505804871546731e-08, - "loss": 0.6097, + "epoch": 0.63, + "grad_norm": 0.7303013344599943, + "learning_rate": 3.125001965901775e-06, + "loss": 0.4396, "step": 8930 }, { - "epoch": 0.94, - "grad_norm": 4.923685715911361, - "learning_rate": 9.472759822888478e-08, - "loss": 0.576, + "epoch": 0.63, + "grad_norm": 1.7340934159895796, + "learning_rate": 3.123936708960263e-06, + "loss": 0.5415, "step": 8931 }, { - "epoch": 0.94, - "grad_norm": 2.249066974617624, - "learning_rate": 9.439771762267069e-08, - "loss": 0.6664, + "epoch": 0.63, + "grad_norm": 0.662558498377235, + "learning_rate": 3.1228715511187714e-06, + "loss": 0.4002, "step": 8932 }, { - "epoch": 0.94, - "grad_norm": 2.2985171963927464, - "learning_rate": 9.40684069351483e-08, - "loss": 0.6363, + "epoch": 0.63, + "grad_norm": 1.5364259541301655, + "learning_rate": 3.1218064924335646e-06, + "loss": 0.5262, "step": 8933 }, { - "epoch": 0.94, - "grad_norm": 2.1586975547776857, - "learning_rate": 9.373966620457753e-08, - "loss": 0.5921, + "epoch": 0.63, + "grad_norm": 1.8361651620544046, + "learning_rate": 3.1207415329609037e-06, + "loss": 0.5593, "step": 8934 }, { - "epoch": 0.94, - "grad_norm": 2.699332277329513, - "learning_rate": 9.341149546914951e-08, - "loss": 0.6785, + "epoch": 0.63, + "grad_norm": 1.7229413234691433, + "learning_rate": 3.1196766727570434e-06, + "loss": 0.5017, "step": 8935 }, { - "epoch": 0.94, - "grad_norm": 2.705681753410133, - "learning_rate": 9.308389476699043e-08, - "loss": 0.6113, + "epoch": 0.63, + "grad_norm": 1.643490426496726, + "learning_rate": 3.1186119118782302e-06, + "loss": 0.5033, "step": 8936 }, { - "epoch": 0.94, - "grad_norm": 2.299817850591371, - "learning_rate": 9.27568641361598e-08, - "loss": 0.6317, + "epoch": 0.63, + "grad_norm": 1.8508625392548246, + "learning_rate": 3.1175472503807137e-06, + "loss": 0.4603, "step": 8937 }, { - "epoch": 0.94, - "grad_norm": 2.6726846572437575, - "learning_rate": 9.243040361465172e-08, - "loss": 0.5638, + "epoch": 0.63, + "grad_norm": 0.7552256825313363, + "learning_rate": 3.116482688320729e-06, + "loss": 0.4485, "step": 8938 }, { - "epoch": 0.94, - "grad_norm": 2.0645519294317953, - "learning_rate": 9.210451324039304e-08, - "loss": 0.6308, + "epoch": 0.63, + "grad_norm": 2.4254989796420166, + "learning_rate": 3.1154182257545096e-06, + "loss": 0.5346, "step": 8939 }, { - "epoch": 0.94, - "grad_norm": 2.4527040374452262, - "learning_rate": 9.177919305124405e-08, - "loss": 0.6753, + "epoch": 0.63, + "grad_norm": 2.0161091641160183, + "learning_rate": 3.1143538627382874e-06, + "loss": 0.4816, "step": 8940 }, { - "epoch": 0.94, - "grad_norm": 0.9497404981411139, - "learning_rate": 9.145444308500117e-08, - "loss": 0.5437, + "epoch": 0.63, + "grad_norm": 1.6689575397794592, + "learning_rate": 3.113289599328282e-06, + "loss": 0.5671, "step": 8941 }, { - "epoch": 0.94, - "grad_norm": 5.86280566237663, - "learning_rate": 9.11302633793909e-08, - "loss": 0.5582, + "epoch": 0.63, + "grad_norm": 1.4895506104009335, + "learning_rate": 3.112225435580715e-06, + "loss": 0.4922, "step": 8942 }, { - "epoch": 0.94, - "grad_norm": 2.231382224332527, - "learning_rate": 9.080665397207755e-08, - "loss": 0.5663, + "epoch": 0.63, + "grad_norm": 2.187744079640872, + "learning_rate": 3.1111613715517942e-06, + "loss": 0.538, "step": 8943 }, { - "epoch": 0.94, - "grad_norm": 2.383310389908959, - "learning_rate": 9.048361490065549e-08, - "loss": 0.5818, + "epoch": 0.63, + "grad_norm": 2.5462907169642666, + "learning_rate": 3.1100974072977318e-06, + "loss": 0.5156, "step": 8944 }, { - "epoch": 0.94, - "grad_norm": 2.365476329509127, - "learning_rate": 9.016114620265526e-08, - "loss": 0.6667, + "epoch": 0.63, + "grad_norm": 1.845530126571093, + "learning_rate": 3.1090335428747285e-06, + "loss": 0.6331, "step": 8945 }, { - "epoch": 0.94, - "grad_norm": 2.6118574377559023, - "learning_rate": 8.983924791553966e-08, - "loss": 0.7023, + "epoch": 0.63, + "grad_norm": 1.4531816171061094, + "learning_rate": 3.1079697783389805e-06, + "loss": 0.5085, "step": 8946 }, { - "epoch": 0.94, - "grad_norm": 3.2390121343870013, - "learning_rate": 8.951792007670713e-08, - "loss": 0.5439, + "epoch": 0.63, + "grad_norm": 1.7627754213614966, + "learning_rate": 3.1069061137466793e-06, + "loss": 0.523, "step": 8947 }, { - "epoch": 0.94, - "grad_norm": 2.824711199639862, - "learning_rate": 8.919716272348722e-08, - "loss": 0.6532, + "epoch": 0.63, + "grad_norm": 1.6940541628125119, + "learning_rate": 3.1058425491540122e-06, + "loss": 0.601, "step": 8948 }, { - "epoch": 0.94, - "grad_norm": 2.7642587541543717, - "learning_rate": 8.88769758931457e-08, - "loss": 0.5559, + "epoch": 0.64, + "grad_norm": 1.6252235166048519, + "learning_rate": 3.104779084617161e-06, + "loss": 0.5619, "step": 8949 }, { - "epoch": 0.94, - "grad_norm": 2.7721849377139645, - "learning_rate": 8.855735962288059e-08, - "loss": 0.6408, + "epoch": 0.64, + "grad_norm": 1.4251883678560178, + "learning_rate": 3.103715720192301e-06, + "loss": 0.5169, "step": 8950 }, { - "epoch": 0.94, - "grad_norm": 12.581276150709563, - "learning_rate": 8.823831394982329e-08, - "loss": 0.5789, + "epoch": 0.64, + "grad_norm": 1.7181893959908747, + "learning_rate": 3.1026524559355987e-06, + "loss": 0.5178, "step": 8951 }, { - "epoch": 0.94, - "grad_norm": 2.6512425220755578, - "learning_rate": 8.791983891104084e-08, - "loss": 0.557, + "epoch": 0.64, + "grad_norm": 1.9390467742937576, + "learning_rate": 3.1015892919032254e-06, + "loss": 0.4664, "step": 8952 }, { - "epoch": 0.94, - "grad_norm": 2.4054321801371494, - "learning_rate": 8.760193454353194e-08, - "loss": 0.6467, + "epoch": 0.64, + "grad_norm": 1.9509674180105803, + "learning_rate": 3.1005262281513386e-06, + "loss": 0.5182, "step": 8953 }, { - "epoch": 0.94, - "grad_norm": 2.9064106195783275, - "learning_rate": 8.728460088422985e-08, - "loss": 0.5413, + "epoch": 0.64, + "grad_norm": 0.7384615622694864, + "learning_rate": 3.099463264736091e-06, + "loss": 0.4156, "step": 8954 }, { - "epoch": 0.94, - "grad_norm": 2.5100181773055414, - "learning_rate": 8.696783797000174e-08, - "loss": 0.6023, + "epoch": 0.64, + "grad_norm": 0.7070443343052686, + "learning_rate": 3.0984004017136337e-06, + "loss": 0.4349, "step": 8955 }, { - "epoch": 0.94, - "grad_norm": 0.9356433470805314, - "learning_rate": 8.665164583764818e-08, - "loss": 0.5343, + "epoch": 0.64, + "grad_norm": 1.8771750034289651, + "learning_rate": 3.0973376391401126e-06, + "loss": 0.541, "step": 8956 }, { - "epoch": 0.94, - "grad_norm": 2.9024523813801144, - "learning_rate": 8.633602452390311e-08, - "loss": 0.6028, + "epoch": 0.64, + "grad_norm": 2.57841370124129, + "learning_rate": 3.096274977071664e-06, + "loss": 0.5679, "step": 8957 }, { - "epoch": 0.94, - "grad_norm": 2.9298897593318474, - "learning_rate": 8.602097406543442e-08, - "loss": 0.6587, + "epoch": 0.64, + "grad_norm": 1.6884188962354933, + "learning_rate": 3.0952124155644203e-06, + "loss": 0.5035, "step": 8958 }, { - "epoch": 0.94, - "grad_norm": 2.6001966117983955, - "learning_rate": 8.570649449884505e-08, - "loss": 0.6481, + "epoch": 0.64, + "grad_norm": 1.6587398979679187, + "learning_rate": 3.0941499546745126e-06, + "loss": 0.5841, "step": 8959 }, { - "epoch": 0.94, - "grad_norm": 2.45160147679471, - "learning_rate": 8.539258586066912e-08, - "loss": 0.5524, + "epoch": 0.64, + "grad_norm": 1.7775281231798017, + "learning_rate": 3.0930875944580625e-06, + "loss": 0.535, "step": 8960 }, { - "epoch": 0.94, - "grad_norm": 4.5237192425119686, - "learning_rate": 8.507924818737523e-08, - "loss": 0.7146, + "epoch": 0.64, + "grad_norm": 1.7811868407511644, + "learning_rate": 3.092025334971186e-06, + "loss": 0.5115, "step": 8961 }, { - "epoch": 0.94, - "grad_norm": 2.5557536371206893, - "learning_rate": 8.476648151536704e-08, - "loss": 0.6013, + "epoch": 0.64, + "grad_norm": 1.653756080545026, + "learning_rate": 3.0909631762699964e-06, + "loss": 0.5201, "step": 8962 }, { - "epoch": 0.94, - "grad_norm": 3.195273209399613, - "learning_rate": 8.445428588098048e-08, - "loss": 0.6442, + "epoch": 0.64, + "grad_norm": 3.2755082440010823, + "learning_rate": 3.089901118410601e-06, + "loss": 0.5391, "step": 8963 }, { - "epoch": 0.94, - "grad_norm": 2.174929174236636, - "learning_rate": 8.414266132048543e-08, - "loss": 0.5563, + "epoch": 0.64, + "grad_norm": 2.2276009967103927, + "learning_rate": 3.0888391614491007e-06, + "loss": 0.4306, "step": 8964 }, { - "epoch": 0.94, - "grad_norm": 4.334437374273213, - "learning_rate": 8.383160787008627e-08, - "loss": 0.6087, + "epoch": 0.64, + "grad_norm": 2.3844859022038385, + "learning_rate": 3.0877773054415926e-06, + "loss": 0.5468, "step": 8965 }, { - "epoch": 0.94, - "grad_norm": 2.2982581898665733, - "learning_rate": 8.352112556591907e-08, - "loss": 0.5159, + "epoch": 0.64, + "grad_norm": 1.7211002851153443, + "learning_rate": 3.086715550444165e-06, + "loss": 0.5459, "step": 8966 }, { - "epoch": 0.94, - "grad_norm": 2.485884461295592, - "learning_rate": 8.321121444405611e-08, - "loss": 0.6474, + "epoch": 0.64, + "grad_norm": 1.7512123749301147, + "learning_rate": 3.085653896512907e-06, + "loss": 0.5151, "step": 8967 }, { - "epoch": 0.94, - "grad_norm": 0.9202366397635555, - "learning_rate": 8.29018745405008e-08, - "loss": 0.4943, + "epoch": 0.64, + "grad_norm": 1.9759594116036368, + "learning_rate": 3.084592343703896e-06, + "loss": 0.474, "step": 8968 }, { - "epoch": 0.94, - "grad_norm": 2.242007414517413, - "learning_rate": 8.259310589119162e-08, - "loss": 0.5891, + "epoch": 0.64, + "grad_norm": 2.6295821319593657, + "learning_rate": 3.0835308920732066e-06, + "loss": 0.5635, "step": 8969 }, { - "epoch": 0.94, - "grad_norm": 2.159210998912373, - "learning_rate": 8.2284908532001e-08, - "loss": 0.6245, + "epoch": 0.64, + "grad_norm": 1.5607213238082143, + "learning_rate": 3.0824695416769103e-06, + "loss": 0.5169, "step": 8970 }, { - "epoch": 0.94, - "grad_norm": 2.5880306782993467, - "learning_rate": 8.19772824987336e-08, - "loss": 0.5807, + "epoch": 0.64, + "grad_norm": 1.9165661191766092, + "learning_rate": 3.0814082925710707e-06, + "loss": 0.5446, "step": 8971 }, { - "epoch": 0.94, - "grad_norm": 0.9458064039539235, - "learning_rate": 8.167022782712919e-08, - "loss": 0.5854, + "epoch": 0.64, + "grad_norm": 1.5078281834408467, + "learning_rate": 3.0803471448117463e-06, + "loss": 0.4601, "step": 8972 }, { - "epoch": 0.94, - "grad_norm": 2.2865336633479743, - "learning_rate": 8.136374455286033e-08, - "loss": 0.6299, + "epoch": 0.64, + "grad_norm": 1.5565400254731439, + "learning_rate": 3.079286098454989e-06, + "loss": 0.5055, "step": 8973 }, { - "epoch": 0.94, - "grad_norm": 0.9344831427818824, - "learning_rate": 8.105783271153356e-08, - "loss": 0.5415, + "epoch": 0.64, + "grad_norm": 1.8914347168093333, + "learning_rate": 3.0782251535568497e-06, + "loss": 0.5559, "step": 8974 }, { - "epoch": 0.94, - "grad_norm": 3.63891046584668, - "learning_rate": 8.075249233868821e-08, - "loss": 0.6369, + "epoch": 0.64, + "grad_norm": 2.6175778328971906, + "learning_rate": 3.0771643101733693e-06, + "loss": 0.4821, "step": 8975 }, { - "epoch": 0.94, - "grad_norm": 1.0932455433990051, - "learning_rate": 8.044772346979812e-08, - "loss": 0.5487, + "epoch": 0.64, + "grad_norm": 1.9366468399465062, + "learning_rate": 3.0761035683605856e-06, + "loss": 0.5257, "step": 8976 }, { - "epoch": 0.94, - "grad_norm": 0.8253603754509732, - "learning_rate": 8.014352614027054e-08, - "loss": 0.531, + "epoch": 0.64, + "grad_norm": 1.5093825999652826, + "learning_rate": 3.07504292817453e-06, + "loss": 0.4941, "step": 8977 }, { - "epoch": 0.94, - "grad_norm": 2.3887697326667325, - "learning_rate": 7.983990038544664e-08, - "loss": 0.5317, + "epoch": 0.64, + "grad_norm": 2.0646063815978413, + "learning_rate": 3.073982389671229e-06, + "loss": 0.5383, "step": 8978 }, { - "epoch": 0.94, - "grad_norm": 0.954945688802483, - "learning_rate": 7.953684624059987e-08, - "loss": 0.5603, + "epoch": 0.64, + "grad_norm": 1.6709794191958434, + "learning_rate": 3.0729219529067054e-06, + "loss": 0.5181, "step": 8979 }, { - "epoch": 0.94, - "grad_norm": 2.949484917320116, - "learning_rate": 7.923436374093929e-08, - "loss": 0.5994, + "epoch": 0.64, + "grad_norm": 2.204822145819431, + "learning_rate": 3.0718616179369753e-06, + "loss": 0.5259, "step": 8980 }, { - "epoch": 0.95, - "grad_norm": 2.027629114424706, - "learning_rate": 7.893245292160511e-08, - "loss": 0.601, + "epoch": 0.64, + "grad_norm": 1.718157047057334, + "learning_rate": 3.070801384818045e-06, + "loss": 0.6182, "step": 8981 }, { - "epoch": 0.95, - "grad_norm": 2.3171055100942057, - "learning_rate": 7.863111381767374e-08, - "loss": 0.544, + "epoch": 0.64, + "grad_norm": 1.9887835947764605, + "learning_rate": 3.0697412536059247e-06, + "loss": 0.5476, "step": 8982 }, { - "epoch": 0.95, - "grad_norm": 2.5697202512374178, - "learning_rate": 7.833034646415272e-08, - "loss": 0.6016, + "epoch": 0.64, + "grad_norm": 2.070915319817309, + "learning_rate": 3.068681224356611e-06, + "loss": 0.5209, "step": 8983 }, { - "epoch": 0.95, - "grad_norm": 2.502909881767718, - "learning_rate": 7.80301508959852e-08, - "loss": 0.6382, + "epoch": 0.64, + "grad_norm": 1.7181723813366638, + "learning_rate": 3.0676212971261005e-06, + "loss": 0.5447, "step": 8984 }, { - "epoch": 0.95, - "grad_norm": 2.4985875211152675, - "learning_rate": 7.773052714804719e-08, - "loss": 0.5886, + "epoch": 0.64, + "grad_norm": 1.6827030494131703, + "learning_rate": 3.0665614719703784e-06, + "loss": 0.5518, "step": 8985 }, { - "epoch": 0.95, - "grad_norm": 2.9604096707667176, - "learning_rate": 7.743147525514749e-08, - "loss": 0.6592, + "epoch": 0.64, + "grad_norm": 1.831661068486478, + "learning_rate": 3.065501748945433e-06, + "loss": 0.51, "step": 8986 }, { - "epoch": 0.95, - "grad_norm": 2.074366972373142, - "learning_rate": 7.713299525202944e-08, - "loss": 0.5226, + "epoch": 0.64, + "grad_norm": 1.6337148593472135, + "learning_rate": 3.06444212810724e-06, + "loss": 0.5386, "step": 8987 }, { - "epoch": 0.95, - "grad_norm": 4.844749218296161, - "learning_rate": 7.683508717336918e-08, - "loss": 0.6071, + "epoch": 0.64, + "grad_norm": 1.5813227706191602, + "learning_rate": 3.0633826095117706e-06, + "loss": 0.5406, "step": 8988 }, { - "epoch": 0.95, - "grad_norm": 2.591141612749998, - "learning_rate": 7.653775105377737e-08, - "loss": 0.6346, + "epoch": 0.64, + "grad_norm": 2.1932359688386516, + "learning_rate": 3.0623231932149965e-06, + "loss": 0.5644, "step": 8989 }, { - "epoch": 0.95, - "grad_norm": 5.626446005912948, - "learning_rate": 7.62409869277969e-08, - "loss": 0.6454, + "epoch": 0.64, + "grad_norm": 1.7428902645358257, + "learning_rate": 3.061263879272876e-06, + "loss": 0.5291, "step": 8990 }, { - "epoch": 0.95, - "grad_norm": 2.5111955019235888, - "learning_rate": 7.59447948299058e-08, - "loss": 0.6752, + "epoch": 0.64, + "grad_norm": 1.7700574596017273, + "learning_rate": 3.0602046677413665e-06, + "loss": 0.565, "step": 8991 }, { - "epoch": 0.95, - "grad_norm": 3.0568359259106237, - "learning_rate": 7.564917479451373e-08, - "loss": 0.617, + "epoch": 0.64, + "grad_norm": 1.7164610893366252, + "learning_rate": 3.0591455586764184e-06, + "loss": 0.5253, "step": 8992 }, { - "epoch": 0.95, - "grad_norm": 2.22905556576431, - "learning_rate": 7.535412685596599e-08, - "loss": 0.658, + "epoch": 0.64, + "grad_norm": 1.5969970085297904, + "learning_rate": 3.0580865521339798e-06, + "loss": 0.5189, "step": 8993 }, { - "epoch": 0.95, - "grad_norm": 3.687646814338487, - "learning_rate": 7.505965104854073e-08, - "loss": 0.7067, + "epoch": 0.64, + "grad_norm": 1.7642723986050934, + "learning_rate": 3.0570276481699894e-06, + "loss": 0.4651, "step": 8994 }, { - "epoch": 0.95, - "grad_norm": 2.1712557178132945, - "learning_rate": 7.476574740644838e-08, - "loss": 0.5687, + "epoch": 0.64, + "grad_norm": 1.8216632960188424, + "learning_rate": 3.0559688468403832e-06, + "loss": 0.5742, "step": 8995 }, { - "epoch": 0.95, - "grad_norm": 3.390681967447077, - "learning_rate": 7.44724159638338e-08, - "loss": 0.6088, + "epoch": 0.64, + "grad_norm": 1.7593466046499908, + "learning_rate": 3.0549101482010875e-06, + "loss": 0.4593, "step": 8996 }, { - "epoch": 0.95, - "grad_norm": 1.0375781519569047, - "learning_rate": 7.417965675477534e-08, - "loss": 0.541, + "epoch": 0.64, + "grad_norm": 2.334780926670625, + "learning_rate": 3.0538515523080304e-06, + "loss": 0.5869, "step": 8997 }, { - "epoch": 0.95, - "grad_norm": 2.695143332239307, - "learning_rate": 7.388746981328632e-08, - "loss": 0.5657, + "epoch": 0.64, + "grad_norm": 1.7412048491676766, + "learning_rate": 3.0527930592171273e-06, + "loss": 0.52, "step": 8998 }, { - "epoch": 0.95, - "grad_norm": 3.5000885655908025, - "learning_rate": 7.359585517331014e-08, - "loss": 0.643, + "epoch": 0.64, + "grad_norm": 3.360786745051587, + "learning_rate": 3.0517346689842954e-06, + "loss": 0.5212, "step": 8999 }, { - "epoch": 0.95, - "grad_norm": 2.556164468290286, - "learning_rate": 7.330481286872749e-08, - "loss": 0.5475, + "epoch": 0.64, + "grad_norm": 1.9498373447263968, + "learning_rate": 3.050676381665436e-06, + "loss": 0.5335, "step": 9000 }, { - "epoch": 0.95, - "grad_norm": 0.8838512097484564, - "learning_rate": 7.301434293334908e-08, - "loss": 0.5353, + "epoch": 0.64, + "grad_norm": 3.0231406272000036, + "learning_rate": 3.0496181973164585e-06, + "loss": 0.5132, "step": 9001 }, { - "epoch": 0.95, - "grad_norm": 2.274766804514225, - "learning_rate": 7.272444540092294e-08, - "loss": 0.5828, + "epoch": 0.64, + "grad_norm": 1.6223038221992754, + "learning_rate": 3.0485601159932564e-06, + "loss": 0.4632, "step": 9002 }, { - "epoch": 0.95, - "grad_norm": 2.1596373340527326, - "learning_rate": 7.243512030512656e-08, - "loss": 0.6407, + "epoch": 0.64, + "grad_norm": 2.0774166598415933, + "learning_rate": 3.0475021377517194e-06, + "loss": 0.5308, "step": 9003 }, { - "epoch": 0.95, - "grad_norm": 2.3934408334922597, - "learning_rate": 7.214636767957417e-08, - "loss": 0.5951, + "epoch": 0.64, + "grad_norm": 1.804274296587741, + "learning_rate": 3.0464442626477375e-06, + "loss": 0.5249, "step": 9004 }, { - "epoch": 0.95, - "grad_norm": 2.1077351249376233, - "learning_rate": 7.18581875578117e-08, - "loss": 0.5951, + "epoch": 0.64, + "grad_norm": 5.224036488826843, + "learning_rate": 3.045386490737189e-06, + "loss": 0.5614, "step": 9005 }, { - "epoch": 0.95, - "grad_norm": 2.7426426194662987, - "learning_rate": 7.157057997331907e-08, - "loss": 0.5838, + "epoch": 0.64, + "grad_norm": 1.7330671546615168, + "learning_rate": 3.0443288220759483e-06, + "loss": 0.5106, "step": 9006 }, { - "epoch": 0.95, - "grad_norm": 3.3577955817656235, - "learning_rate": 7.128354495951006e-08, - "loss": 0.6553, + "epoch": 0.64, + "grad_norm": 1.3608814447332673, + "learning_rate": 3.0432712567198853e-06, + "loss": 0.5254, "step": 9007 }, { - "epoch": 0.95, - "grad_norm": 3.089592673711738, - "learning_rate": 7.099708254973136e-08, - "loss": 0.542, + "epoch": 0.64, + "grad_norm": 1.739900962941544, + "learning_rate": 3.0422137947248664e-06, + "loss": 0.5083, "step": 9008 }, { - "epoch": 0.95, - "grad_norm": 2.6306257005655937, - "learning_rate": 7.071119277726301e-08, - "loss": 0.5938, + "epoch": 0.64, + "grad_norm": 1.6103585829932647, + "learning_rate": 3.041156436146748e-06, + "loss": 0.5576, "step": 9009 }, { - "epoch": 0.95, - "grad_norm": 4.773569073948341, - "learning_rate": 7.0425875675319e-08, - "loss": 0.6349, + "epoch": 0.64, + "grad_norm": 1.9489929267542763, + "learning_rate": 3.0400991810413854e-06, + "loss": 0.5016, "step": 9010 }, { - "epoch": 0.95, - "grad_norm": 2.1860135695945595, - "learning_rate": 7.014113127704725e-08, - "loss": 0.5982, + "epoch": 0.64, + "grad_norm": 1.9113859502231836, + "learning_rate": 3.039042029464623e-06, + "loss": 0.6077, "step": 9011 }, { - "epoch": 0.95, - "grad_norm": 3.076079644508135, - "learning_rate": 6.985695961552796e-08, - "loss": 0.6265, + "epoch": 0.64, + "grad_norm": 1.9074311765185656, + "learning_rate": 3.037984981472306e-06, + "loss": 0.6075, "step": 9012 }, { - "epoch": 0.95, - "grad_norm": 0.9415297800104788, - "learning_rate": 6.957336072377586e-08, - "loss": 0.5714, + "epoch": 0.64, + "grad_norm": 1.9576645624315303, + "learning_rate": 3.0369280371202703e-06, + "loss": 0.5731, "step": 9013 }, { - "epoch": 0.95, - "grad_norm": 2.7068897773748826, - "learning_rate": 6.929033463473789e-08, - "loss": 0.5468, + "epoch": 0.64, + "grad_norm": 1.7547931859123291, + "learning_rate": 3.035871196464349e-06, + "loss": 0.511, "step": 9014 }, { - "epoch": 0.95, - "grad_norm": 2.2176189144911542, - "learning_rate": 6.900788138129554e-08, - "loss": 0.6505, + "epoch": 0.64, + "grad_norm": 1.8598804493695826, + "learning_rate": 3.0348144595603633e-06, + "loss": 0.4743, "step": 9015 }, { - "epoch": 0.95, - "grad_norm": 2.3975049310120897, - "learning_rate": 6.872600099626369e-08, - "loss": 0.5567, + "epoch": 0.64, + "grad_norm": 2.076158254351888, + "learning_rate": 3.03375782646414e-06, + "loss": 0.5624, "step": 9016 }, { - "epoch": 0.95, - "grad_norm": 0.9140350212678829, - "learning_rate": 6.844469351239003e-08, - "loss": 0.5619, + "epoch": 0.64, + "grad_norm": 2.226056441228961, + "learning_rate": 3.032701297231491e-06, + "loss": 0.5487, "step": 9017 }, { - "epoch": 0.95, - "grad_norm": 2.5388047256734287, - "learning_rate": 6.816395896235617e-08, - "loss": 0.6404, + "epoch": 0.64, + "grad_norm": 11.457833471430083, + "learning_rate": 3.0316448719182224e-06, + "loss": 0.4868, "step": 9018 }, { - "epoch": 0.95, - "grad_norm": 2.61129763755357, - "learning_rate": 6.78837973787766e-08, - "loss": 0.6912, + "epoch": 0.64, + "grad_norm": 0.6385500546649843, + "learning_rate": 3.0305885505801435e-06, + "loss": 0.4492, "step": 9019 }, { - "epoch": 0.95, - "grad_norm": 2.7678872176224463, - "learning_rate": 6.760420879420082e-08, - "loss": 0.5791, + "epoch": 0.64, + "grad_norm": 1.868736026433916, + "learning_rate": 3.0295323332730515e-06, + "loss": 0.4521, "step": 9020 }, { - "epoch": 0.95, - "grad_norm": 5.085235281927168, - "learning_rate": 6.732519324111009e-08, - "loss": 0.6828, + "epoch": 0.64, + "grad_norm": 1.8023401071464142, + "learning_rate": 3.0284762200527367e-06, + "loss": 0.5003, "step": 9021 }, { - "epoch": 0.95, - "grad_norm": 2.045301446382808, - "learning_rate": 6.704675075191902e-08, - "loss": 0.6762, + "epoch": 0.64, + "grad_norm": 1.567993440844698, + "learning_rate": 3.0274202109749877e-06, + "loss": 0.5843, "step": 9022 }, { - "epoch": 0.95, - "grad_norm": 3.2248231907993987, - "learning_rate": 6.676888135897674e-08, - "loss": 0.649, + "epoch": 0.64, + "grad_norm": 2.806802473816881, + "learning_rate": 3.026364306095589e-06, + "loss": 0.566, "step": 9023 }, { - "epoch": 0.95, - "grad_norm": 2.704494653374823, - "learning_rate": 6.649158509456576e-08, - "loss": 0.5956, + "epoch": 0.64, + "grad_norm": 2.00575768960431, + "learning_rate": 3.025308505470316e-06, + "loss": 0.5335, "step": 9024 }, { - "epoch": 0.95, - "grad_norm": 0.9722338123614437, - "learning_rate": 6.621486199090088e-08, - "loss": 0.5335, + "epoch": 0.64, + "grad_norm": 1.7151981093860453, + "learning_rate": 3.0242528091549382e-06, + "loss": 0.5807, "step": 9025 }, { - "epoch": 0.95, - "grad_norm": 2.591910857960895, - "learning_rate": 6.593871208013136e-08, - "loss": 0.6333, + "epoch": 0.64, + "grad_norm": 2.089195224507916, + "learning_rate": 3.0231972172052197e-06, + "loss": 0.5204, "step": 9026 }, { - "epoch": 0.95, - "grad_norm": 2.8111496930068176, - "learning_rate": 6.566313539433877e-08, - "loss": 0.6471, + "epoch": 0.64, + "grad_norm": 1.8700469760304583, + "learning_rate": 3.022141729676925e-06, + "loss": 0.5099, "step": 9027 }, { - "epoch": 0.95, - "grad_norm": 2.432987898988874, - "learning_rate": 6.538813196553973e-08, - "loss": 0.5657, + "epoch": 0.64, + "grad_norm": 1.8665526656034335, + "learning_rate": 3.0210863466258044e-06, + "loss": 0.5261, "step": 9028 }, { - "epoch": 0.95, - "grad_norm": 2.439770278419752, - "learning_rate": 6.511370182568311e-08, - "loss": 0.6174, + "epoch": 0.64, + "grad_norm": 3.716833263355458, + "learning_rate": 3.02003106810761e-06, + "loss": 0.4974, "step": 9029 }, { - "epoch": 0.95, - "grad_norm": 2.3437842495883623, - "learning_rate": 6.483984500665119e-08, - "loss": 0.5372, + "epoch": 0.64, + "grad_norm": 1.7682277602689471, + "learning_rate": 3.0189758941780817e-06, + "loss": 0.5134, "step": 9030 }, { - "epoch": 0.95, - "grad_norm": 2.638108993601756, - "learning_rate": 6.456656154025964e-08, - "loss": 0.6398, + "epoch": 0.64, + "grad_norm": 1.6311762848280693, + "learning_rate": 3.017920824892961e-06, + "loss": 0.5123, "step": 9031 }, { - "epoch": 0.95, - "grad_norm": 2.5417156476529437, - "learning_rate": 6.429385145825861e-08, - "loss": 0.5727, + "epoch": 0.64, + "grad_norm": 1.7266939899941267, + "learning_rate": 3.0168658603079783e-06, + "loss": 0.5335, "step": 9032 }, { - "epoch": 0.95, - "grad_norm": 2.6367489893136846, - "learning_rate": 6.402171479233e-08, - "loss": 0.6402, + "epoch": 0.64, + "grad_norm": 1.7577503912681252, + "learning_rate": 3.0158110004788587e-06, + "loss": 0.5517, "step": 9033 }, { - "epoch": 0.95, - "grad_norm": 2.001287522657275, - "learning_rate": 6.375015157409015e-08, - "loss": 0.6013, + "epoch": 0.64, + "grad_norm": 1.6407520330276324, + "learning_rate": 3.014756245461328e-06, + "loss": 0.4596, "step": 9034 }, { - "epoch": 0.95, - "grad_norm": 2.4098090763309523, - "learning_rate": 6.347916183508828e-08, - "loss": 0.5185, + "epoch": 0.64, + "grad_norm": 1.4538438690941864, + "learning_rate": 3.0137015953110983e-06, + "loss": 0.4635, "step": 9035 }, { - "epoch": 0.95, - "grad_norm": 2.6963976959673808, - "learning_rate": 6.320874560680757e-08, - "loss": 0.7315, + "epoch": 0.64, + "grad_norm": 1.8951192232699503, + "learning_rate": 3.012647050083881e-06, + "loss": 0.5148, "step": 9036 }, { - "epoch": 0.95, - "grad_norm": 0.8986435359967851, - "learning_rate": 6.293890292066395e-08, - "loss": 0.5098, + "epoch": 0.64, + "grad_norm": 1.8117284127773448, + "learning_rate": 3.0115926098353793e-06, + "loss": 0.5801, "step": 9037 }, { - "epoch": 0.95, - "grad_norm": 3.122976565906962, - "learning_rate": 6.266963380800684e-08, - "loss": 0.6318, + "epoch": 0.64, + "grad_norm": 1.4575865000496393, + "learning_rate": 3.0105382746212952e-06, + "loss": 0.4873, "step": 9038 }, { - "epoch": 0.95, - "grad_norm": 2.4548075643836396, - "learning_rate": 6.24009383001195e-08, - "loss": 0.6208, + "epoch": 0.64, + "grad_norm": 0.7553313428722288, + "learning_rate": 3.0094840444973204e-06, + "loss": 0.4104, "step": 9039 }, { - "epoch": 0.95, - "grad_norm": 3.0915406479900294, - "learning_rate": 6.213281642821811e-08, - "loss": 0.562, + "epoch": 0.64, + "grad_norm": 1.8000695540517964, + "learning_rate": 3.008429919519144e-06, + "loss": 0.5552, "step": 9040 }, { - "epoch": 0.95, - "grad_norm": 2.875966474710966, - "learning_rate": 6.186526822345163e-08, - "loss": 0.6213, + "epoch": 0.64, + "grad_norm": 1.895191584639015, + "learning_rate": 3.0073758997424453e-06, + "loss": 0.5635, "step": 9041 }, { - "epoch": 0.95, - "grad_norm": 3.426133515510632, - "learning_rate": 6.159829371690407e-08, - "loss": 0.5456, + "epoch": 0.64, + "grad_norm": 1.8818604761723605, + "learning_rate": 3.006321985222905e-06, + "loss": 0.6052, "step": 9042 }, { - "epoch": 0.95, - "grad_norm": 2.21552519795364, - "learning_rate": 6.133189293959175e-08, - "loss": 0.5443, + "epoch": 0.64, + "grad_norm": 1.8496861388750736, + "learning_rate": 3.0052681760161927e-06, + "loss": 0.5825, "step": 9043 }, { - "epoch": 0.95, - "grad_norm": 3.182803894427842, - "learning_rate": 6.106606592246267e-08, - "loss": 0.7024, + "epoch": 0.64, + "grad_norm": 1.5870882257801573, + "learning_rate": 3.0042144721779755e-06, + "loss": 0.5418, "step": 9044 }, { - "epoch": 0.95, - "grad_norm": 2.838797575657555, - "learning_rate": 6.08008126964016e-08, - "loss": 0.5106, + "epoch": 0.64, + "grad_norm": 2.0263551709210055, + "learning_rate": 3.0031608737639095e-06, + "loss": 0.5948, "step": 9045 }, { - "epoch": 0.95, - "grad_norm": 2.464352882434948, - "learning_rate": 6.053613329222441e-08, - "loss": 0.6797, + "epoch": 0.64, + "grad_norm": 2.039966107566047, + "learning_rate": 3.0021073808296553e-06, + "loss": 0.5343, "step": 9046 }, { - "epoch": 0.95, - "grad_norm": 2.5403063737877813, - "learning_rate": 6.027202774068042e-08, - "loss": 0.6741, + "epoch": 0.64, + "grad_norm": 1.7071790207949384, + "learning_rate": 3.001053993430858e-06, + "loss": 0.5026, "step": 9047 }, { - "epoch": 0.95, - "grad_norm": 2.275405839493515, - "learning_rate": 6.00084960724534e-08, - "loss": 0.5259, + "epoch": 0.64, + "grad_norm": 1.7980572366895888, + "learning_rate": 3.000000711623163e-06, + "loss": 0.5443, "step": 9048 }, { - "epoch": 0.95, - "grad_norm": 2.4141326847728526, - "learning_rate": 5.974553831815888e-08, - "loss": 0.6703, + "epoch": 0.64, + "grad_norm": 1.6923195763210657, + "learning_rate": 2.9989475354622055e-06, + "loss": 0.4672, "step": 9049 }, { - "epoch": 0.95, - "grad_norm": 2.7803880388763815, - "learning_rate": 5.9483154508347406e-08, - "loss": 0.6808, + "epoch": 0.64, + "grad_norm": 1.5492367449603235, + "learning_rate": 2.99789446500362e-06, + "loss": 0.5636, "step": 9050 }, { - "epoch": 0.95, - "grad_norm": 2.32854036522692, - "learning_rate": 5.9221344673500714e-08, - "loss": 0.6318, + "epoch": 0.64, + "grad_norm": 2.0782130955323086, + "learning_rate": 2.9968415003030337e-06, + "loss": 0.4544, "step": 9051 }, { - "epoch": 0.95, - "grad_norm": 2.7007162566736373, - "learning_rate": 5.896010884403669e-08, - "loss": 0.6192, + "epoch": 0.64, + "grad_norm": 1.6544206657704241, + "learning_rate": 2.995788641416066e-06, + "loss": 0.5569, "step": 9052 }, { - "epoch": 0.95, - "grad_norm": 2.6424830019293544, - "learning_rate": 5.8699447050303284e-08, - "loss": 0.6002, + "epoch": 0.64, + "grad_norm": 1.629938069184111, + "learning_rate": 2.9947358883983347e-06, + "loss": 0.6035, "step": 9053 }, { - "epoch": 0.95, - "grad_norm": 1.9699800972674935, - "learning_rate": 5.84393593225846e-08, - "loss": 0.5143, + "epoch": 0.64, + "grad_norm": 1.5900035672666304, + "learning_rate": 2.993683241305449e-06, + "loss": 0.4943, "step": 9054 }, { - "epoch": 0.95, - "grad_norm": 2.1870238921593677, - "learning_rate": 5.817984569109702e-08, - "loss": 0.6391, + "epoch": 0.64, + "grad_norm": 1.8533380503704442, + "learning_rate": 2.992630700193013e-06, + "loss": 0.5193, "step": 9055 }, { - "epoch": 0.95, - "grad_norm": 2.3894652143809623, - "learning_rate": 5.792090618598922e-08, - "loss": 0.5925, + "epoch": 0.64, + "grad_norm": 1.558344642984947, + "learning_rate": 2.9915782651166226e-06, + "loss": 0.5678, "step": 9056 }, { - "epoch": 0.95, - "grad_norm": 2.0806139077721775, - "learning_rate": 5.766254083734435e-08, - "loss": 0.6226, + "epoch": 0.64, + "grad_norm": 1.7541849406246062, + "learning_rate": 2.9905259361318763e-06, + "loss": 0.527, "step": 9057 }, { - "epoch": 0.95, - "grad_norm": 2.0742443711293004, - "learning_rate": 5.740474967517839e-08, - "loss": 0.7206, + "epoch": 0.64, + "grad_norm": 3.1358498860846757, + "learning_rate": 2.9894737132943573e-06, + "loss": 0.5396, "step": 9058 }, { - "epoch": 0.95, - "grad_norm": 4.751056686871944, - "learning_rate": 5.714753272944129e-08, - "loss": 0.63, + "epoch": 0.64, + "grad_norm": 1.8263316312287419, + "learning_rate": 2.9884215966596507e-06, + "loss": 0.4962, "step": 9059 }, { - "epoch": 0.95, - "grad_norm": 2.9924140950273572, - "learning_rate": 5.68908900300158e-08, - "loss": 0.5352, + "epoch": 0.64, + "grad_norm": 1.8879636877729697, + "learning_rate": 2.9873695862833295e-06, + "loss": 0.5866, "step": 9060 }, { - "epoch": 0.95, - "grad_norm": 2.401100392375077, - "learning_rate": 5.6634821606717514e-08, - "loss": 0.6337, + "epoch": 0.64, + "grad_norm": 1.847644580086368, + "learning_rate": 2.9863176822209694e-06, + "loss": 0.4814, "step": 9061 }, { - "epoch": 0.95, - "grad_norm": 3.3084059799813343, - "learning_rate": 5.6379327489295424e-08, - "loss": 0.5915, + "epoch": 0.64, + "grad_norm": 1.8482484330841524, + "learning_rate": 2.9852658845281313e-06, + "loss": 0.5301, "step": 9062 }, { - "epoch": 0.95, - "grad_norm": 2.4362237430967926, - "learning_rate": 5.6124407707432436e-08, - "loss": 0.6301, + "epoch": 0.64, + "grad_norm": 1.9578317251517725, + "learning_rate": 2.9842141932603757e-06, + "loss": 0.5788, "step": 9063 }, { - "epoch": 0.95, - "grad_norm": 2.974258772040746, - "learning_rate": 5.5870062290744876e-08, - "loss": 0.6755, + "epoch": 0.64, + "grad_norm": 1.569155942427024, + "learning_rate": 2.9831626084732568e-06, + "loss": 0.5508, "step": 9064 }, { - "epoch": 0.95, - "grad_norm": 2.330894208256594, - "learning_rate": 5.5616291268781875e-08, - "loss": 0.6224, + "epoch": 0.64, + "grad_norm": 1.8271862358497726, + "learning_rate": 2.9821111302223226e-06, + "loss": 0.5029, "step": 9065 }, { - "epoch": 0.95, - "grad_norm": 2.1719011136515, - "learning_rate": 5.53630946710243e-08, - "loss": 0.6096, + "epoch": 0.64, + "grad_norm": 1.5600804808284479, + "learning_rate": 2.9810597585631173e-06, + "loss": 0.5151, "step": 9066 }, { - "epoch": 0.95, - "grad_norm": 2.645578213445915, - "learning_rate": 5.5110472526889725e-08, - "loss": 0.5871, + "epoch": 0.64, + "grad_norm": 0.6756596923441516, + "learning_rate": 2.9800084935511746e-06, + "loss": 0.4326, "step": 9067 }, { - "epoch": 0.95, - "grad_norm": 2.176097941547101, - "learning_rate": 5.485842486572579e-08, - "loss": 0.6087, + "epoch": 0.64, + "grad_norm": 2.5780330666130133, + "learning_rate": 2.9789573352420296e-06, + "loss": 0.5105, "step": 9068 }, { - "epoch": 0.95, - "grad_norm": 2.84982524682506, - "learning_rate": 5.4606951716815735e-08, - "loss": 0.6064, + "epoch": 0.64, + "grad_norm": 0.8320275511792663, + "learning_rate": 2.9779062836912076e-06, + "loss": 0.4016, "step": 9069 }, { - "epoch": 0.95, - "grad_norm": 2.4460753590123403, - "learning_rate": 5.435605310937342e-08, - "loss": 0.6023, + "epoch": 0.64, + "grad_norm": 2.0167229087392036, + "learning_rate": 2.9768553389542265e-06, + "loss": 0.5684, "step": 9070 }, { - "epoch": 0.95, - "grad_norm": 3.2859655207695355, - "learning_rate": 5.410572907254885e-08, - "loss": 0.6332, + "epoch": 0.64, + "grad_norm": 1.541893262516551, + "learning_rate": 2.9758045010866e-06, + "loss": 0.483, "step": 9071 }, { - "epoch": 0.95, - "grad_norm": 2.7744254402541415, - "learning_rate": 5.3855979635423774e-08, - "loss": 0.6184, + "epoch": 0.64, + "grad_norm": 1.7421225648676588, + "learning_rate": 2.97475377014384e-06, + "loss": 0.5071, "step": 9072 }, { - "epoch": 0.95, - "grad_norm": 2.2908907666964726, - "learning_rate": 5.360680482701275e-08, - "loss": 0.6022, + "epoch": 0.64, + "grad_norm": 1.8991158542986202, + "learning_rate": 2.973703146181448e-06, + "loss": 0.4399, "step": 9073 }, { - "epoch": 0.95, - "grad_norm": 2.659397892050746, - "learning_rate": 5.3358204676264844e-08, - "loss": 0.5827, + "epoch": 0.64, + "grad_norm": 2.144857953910054, + "learning_rate": 2.972652629254923e-06, + "loss": 0.5633, "step": 9074 }, { - "epoch": 0.95, - "grad_norm": 2.568905156288951, - "learning_rate": 5.3110179212061406e-08, - "loss": 0.6192, + "epoch": 0.64, + "grad_norm": 1.7474322907095547, + "learning_rate": 2.971602219419753e-06, + "loss": 0.5155, "step": 9075 }, { - "epoch": 0.96, - "grad_norm": 2.292649322204462, - "learning_rate": 5.286272846321716e-08, - "loss": 0.5548, + "epoch": 0.64, + "grad_norm": 1.7751174624702097, + "learning_rate": 2.9705519167314295e-06, + "loss": 0.5042, "step": 9076 }, { - "epoch": 0.96, - "grad_norm": 3.0132313937949267, - "learning_rate": 5.2615852458480775e-08, - "loss": 0.6589, + "epoch": 0.64, + "grad_norm": 2.5255934537175073, + "learning_rate": 2.9695017212454302e-06, + "loss": 0.5445, "step": 9077 }, { - "epoch": 0.96, - "grad_norm": 4.892679189485966, - "learning_rate": 5.23695512265332e-08, - "loss": 0.5454, + "epoch": 0.64, + "grad_norm": 2.0735033758027996, + "learning_rate": 2.9684516330172297e-06, + "loss": 0.5243, "step": 9078 }, { - "epoch": 0.96, - "grad_norm": 2.2508615371235727, - "learning_rate": 5.2123824795988764e-08, - "loss": 0.6351, + "epoch": 0.64, + "grad_norm": 0.690645719944104, + "learning_rate": 2.9674016521022977e-06, + "loss": 0.4294, "step": 9079 }, { - "epoch": 0.96, - "grad_norm": 4.9551130391975455, - "learning_rate": 5.187867319539519e-08, - "loss": 0.6406, + "epoch": 0.64, + "grad_norm": 1.649333469110579, + "learning_rate": 2.966351778556097e-06, + "loss": 0.5599, "step": 9080 }, { - "epoch": 0.96, - "grad_norm": 10.096299527370705, - "learning_rate": 5.163409645323414e-08, - "loss": 0.6185, + "epoch": 0.64, + "grad_norm": 2.3392982829962583, + "learning_rate": 2.965302012434088e-06, + "loss": 0.5871, "step": 9081 }, { - "epoch": 0.96, - "grad_norm": 3.0533996853064957, - "learning_rate": 5.139009459791955e-08, - "loss": 0.5818, + "epoch": 0.64, + "grad_norm": 1.6322904357154613, + "learning_rate": 2.964252353791719e-06, + "loss": 0.5243, "step": 9082 }, { - "epoch": 0.96, - "grad_norm": 2.9947071352082357, - "learning_rate": 5.1146667657798744e-08, - "loss": 0.5878, + "epoch": 0.64, + "grad_norm": 1.7649422780164459, + "learning_rate": 2.963202802684441e-06, + "loss": 0.5239, "step": 9083 }, { - "epoch": 0.96, - "grad_norm": 2.6515995229170928, - "learning_rate": 5.0903815661152435e-08, - "loss": 0.6047, + "epoch": 0.64, + "grad_norm": 1.5702511444579155, + "learning_rate": 2.9621533591676923e-06, + "loss": 0.4729, "step": 9084 }, { - "epoch": 0.96, - "grad_norm": 2.216360216245191, - "learning_rate": 5.0661538636194164e-08, - "loss": 0.6247, + "epoch": 0.64, + "grad_norm": 1.7699795599402075, + "learning_rate": 2.961104023296908e-06, + "loss": 0.5228, "step": 9085 }, { - "epoch": 0.96, - "grad_norm": 3.268655449300178, - "learning_rate": 5.041983661107142e-08, - "loss": 0.622, + "epoch": 0.64, + "grad_norm": 2.328969849088684, + "learning_rate": 2.960054795127516e-06, + "loss": 0.5985, "step": 9086 }, { - "epoch": 0.96, - "grad_norm": 21.215101180819993, - "learning_rate": 5.017870961386451e-08, - "loss": 0.568, + "epoch": 0.64, + "grad_norm": 5.435355368118025, + "learning_rate": 2.9590056747149437e-06, + "loss": 0.4739, "step": 9087 }, { - "epoch": 0.96, - "grad_norm": 2.415400161597065, - "learning_rate": 4.9938157672586585e-08, - "loss": 0.664, + "epoch": 0.64, + "grad_norm": 2.04534983820134, + "learning_rate": 2.9579566621146082e-06, + "loss": 0.5339, "step": 9088 }, { - "epoch": 0.96, - "grad_norm": 2.498930091348122, - "learning_rate": 4.9698180815183626e-08, - "loss": 0.6175, + "epoch": 0.64, + "grad_norm": 1.6358878890054194, + "learning_rate": 2.9569077573819205e-06, + "loss": 0.4516, "step": 9089 }, { - "epoch": 0.96, - "grad_norm": 2.2002817993561457, - "learning_rate": 4.945877906953722e-08, - "loss": 0.5586, + "epoch": 0.65, + "grad_norm": 1.559303470986315, + "learning_rate": 2.955858960572287e-06, + "loss": 0.5035, "step": 9090 }, { - "epoch": 0.96, - "grad_norm": 2.936573265428427, - "learning_rate": 4.921995246345901e-08, - "loss": 0.5832, + "epoch": 0.65, + "grad_norm": 1.525706501303829, + "learning_rate": 2.954810271741111e-06, + "loss": 0.5241, "step": 9091 }, { - "epoch": 0.96, - "grad_norm": 2.2660732599551445, - "learning_rate": 4.898170102469513e-08, - "loss": 0.6299, + "epoch": 0.65, + "grad_norm": 1.595887534947597, + "learning_rate": 2.953761690943787e-06, + "loss": 0.4874, "step": 9092 }, { - "epoch": 0.96, - "grad_norm": 2.3604212255644623, - "learning_rate": 4.87440247809251e-08, - "loss": 0.6544, + "epoch": 0.65, + "grad_norm": 1.84480894344978, + "learning_rate": 2.9527132182357034e-06, + "loss": 0.6127, "step": 9093 }, { - "epoch": 0.96, - "grad_norm": 2.441174650138323, - "learning_rate": 4.850692375976185e-08, - "loss": 0.6546, + "epoch": 0.65, + "grad_norm": 0.724954841055202, + "learning_rate": 2.951664853672244e-06, + "loss": 0.4192, "step": 9094 }, { - "epoch": 0.96, - "grad_norm": 4.559330779198589, - "learning_rate": 4.827039798875111e-08, - "loss": 0.5868, + "epoch": 0.65, + "grad_norm": 1.5072061096539031, + "learning_rate": 2.9506165973087873e-06, + "loss": 0.5501, "step": 9095 }, { - "epoch": 0.96, - "grad_norm": 2.6264220920489487, - "learning_rate": 4.803444749537145e-08, - "loss": 0.592, + "epoch": 0.65, + "grad_norm": 1.6408571725687078, + "learning_rate": 2.949568449200708e-06, + "loss": 0.5552, "step": 9096 }, { - "epoch": 0.96, - "grad_norm": 2.346201879455138, - "learning_rate": 4.7799072307034845e-08, - "loss": 0.5913, + "epoch": 0.65, + "grad_norm": 1.7545752499715042, + "learning_rate": 2.9485204094033694e-06, + "loss": 0.4826, "step": 9097 }, { - "epoch": 0.96, - "grad_norm": 2.2187162896332735, - "learning_rate": 4.756427245108664e-08, - "loss": 0.5922, + "epoch": 0.65, + "grad_norm": 0.750689301902596, + "learning_rate": 2.9474724779721352e-06, + "loss": 0.4283, "step": 9098 }, { - "epoch": 0.96, - "grad_norm": 2.665266994162754, - "learning_rate": 4.733004795480556e-08, - "loss": 0.5226, + "epoch": 0.65, + "grad_norm": 1.7694538005454794, + "learning_rate": 2.94642465496236e-06, + "loss": 0.603, "step": 9099 }, { - "epoch": 0.96, - "grad_norm": 2.309703067456407, - "learning_rate": 4.709639884540262e-08, - "loss": 0.6754, + "epoch": 0.65, + "grad_norm": 1.4383253830582385, + "learning_rate": 2.945376940429393e-06, + "loss": 0.5595, "step": 9100 }, { - "epoch": 0.96, - "grad_norm": 3.519489921324912, - "learning_rate": 4.686332515002223e-08, - "loss": 0.5979, + "epoch": 0.65, + "grad_norm": 7.653869215502521, + "learning_rate": 2.944329334428576e-06, + "loss": 0.5508, "step": 9101 }, { - "epoch": 0.96, - "grad_norm": 3.4734030677173644, - "learning_rate": 4.663082689574328e-08, - "loss": 0.5856, + "epoch": 0.65, + "grad_norm": 1.431629562731777, + "learning_rate": 2.9432818370152503e-06, + "loss": 0.4798, "step": 9102 }, { - "epoch": 0.96, - "grad_norm": 0.9763445326065605, - "learning_rate": 4.6398904109575815e-08, - "loss": 0.5201, + "epoch": 0.65, + "grad_norm": 1.8359598722880233, + "learning_rate": 2.942234448244748e-06, + "loss": 0.5177, "step": 9103 }, { - "epoch": 0.96, - "grad_norm": 2.2503233196738193, - "learning_rate": 4.616755681846441e-08, - "loss": 0.543, + "epoch": 0.65, + "grad_norm": 1.6642943830379398, + "learning_rate": 2.941187168172395e-06, + "loss": 0.5697, "step": 9104 }, { - "epoch": 0.96, - "grad_norm": 2.2179469381251273, - "learning_rate": 4.593678504928589e-08, - "loss": 0.6158, + "epoch": 0.65, + "grad_norm": 0.681232889719987, + "learning_rate": 2.940139996853509e-06, + "loss": 0.4369, "step": 9105 }, { - "epoch": 0.96, - "grad_norm": 2.623231471830531, - "learning_rate": 4.570658882885104e-08, - "loss": 0.6553, + "epoch": 0.65, + "grad_norm": 1.572636934299552, + "learning_rate": 2.9390929343434117e-06, + "loss": 0.5915, "step": 9106 }, { - "epoch": 0.96, - "grad_norm": 2.50946392413075, - "learning_rate": 4.547696818390346e-08, - "loss": 0.5586, + "epoch": 0.65, + "grad_norm": 1.7111222481926083, + "learning_rate": 2.9380459806974075e-06, + "loss": 0.5807, "step": 9107 }, { - "epoch": 0.96, - "grad_norm": 2.2550240930054586, - "learning_rate": 4.524792314111959e-08, - "loss": 0.6628, + "epoch": 0.65, + "grad_norm": 1.7582873850297525, + "learning_rate": 2.936999135970801e-06, + "loss": 0.5603, "step": 9108 }, { - "epoch": 0.96, - "grad_norm": 2.249364536937027, - "learning_rate": 4.501945372710925e-08, - "loss": 0.5307, + "epoch": 0.65, + "grad_norm": 1.6930541640976577, + "learning_rate": 2.935952400218891e-06, + "loss": 0.5686, "step": 9109 }, { - "epoch": 0.96, - "grad_norm": 2.641533439490308, - "learning_rate": 4.4791559968415664e-08, - "loss": 0.581, + "epoch": 0.65, + "grad_norm": 1.3512936299481089, + "learning_rate": 2.9349057734969688e-06, + "loss": 0.4582, "step": 9110 }, { - "epoch": 0.96, - "grad_norm": 2.400735266374335, - "learning_rate": 4.456424189151376e-08, - "loss": 0.5803, + "epoch": 0.65, + "grad_norm": 1.3979439085817935, + "learning_rate": 2.9338592558603217e-06, + "loss": 0.5344, "step": 9111 }, { - "epoch": 0.96, - "grad_norm": 2.013311376301837, - "learning_rate": 4.433749952281463e-08, - "loss": 0.5211, + "epoch": 0.65, + "grad_norm": 1.7966918616772511, + "learning_rate": 2.9328128473642303e-06, + "loss": 0.5925, "step": 9112 }, { - "epoch": 0.96, - "grad_norm": 2.6401002509833007, - "learning_rate": 4.4111332888658876e-08, - "loss": 0.6004, + "epoch": 0.65, + "grad_norm": 2.018210215146075, + "learning_rate": 2.931766548063967e-06, + "loss": 0.4942, "step": 9113 }, { - "epoch": 0.96, - "grad_norm": 0.9931452731017555, - "learning_rate": 4.388574201532214e-08, - "loss": 0.5143, + "epoch": 0.65, + "grad_norm": 1.433107988509724, + "learning_rate": 2.9307203580148037e-06, + "loss": 0.4576, "step": 9114 }, { - "epoch": 0.96, - "grad_norm": 2.6549315124291772, - "learning_rate": 4.366072692901346e-08, - "loss": 0.6261, + "epoch": 0.65, + "grad_norm": 1.8698281153158276, + "learning_rate": 2.929674277272003e-06, + "loss": 0.5283, "step": 9115 }, { - "epoch": 0.96, - "grad_norm": 3.012204151927376, - "learning_rate": 4.343628765587471e-08, - "loss": 0.6443, + "epoch": 0.65, + "grad_norm": 1.4812923824188158, + "learning_rate": 2.9286283058908215e-06, + "loss": 0.5165, "step": 9116 }, { - "epoch": 0.96, - "grad_norm": 1.0512576287150874, - "learning_rate": 4.321242422197946e-08, - "loss": 0.5194, + "epoch": 0.65, + "grad_norm": 1.798572885710306, + "learning_rate": 2.9275824439265123e-06, + "loss": 0.5586, "step": 9117 }, { - "epoch": 0.96, - "grad_norm": 2.609812624572654, - "learning_rate": 4.298913665333637e-08, - "loss": 0.5724, + "epoch": 0.65, + "grad_norm": 1.663242449610458, + "learning_rate": 2.926536691434321e-06, + "loss": 0.5867, "step": 9118 }, { - "epoch": 0.96, - "grad_norm": 3.067384788809318, - "learning_rate": 4.276642497588579e-08, - "loss": 0.5614, + "epoch": 0.65, + "grad_norm": 0.7797944645711377, + "learning_rate": 2.925491048469488e-06, + "loss": 0.4359, "step": 9119 }, { - "epoch": 0.96, - "grad_norm": 3.4934409570440055, - "learning_rate": 4.2544289215502576e-08, - "loss": 0.5847, + "epoch": 0.65, + "grad_norm": 1.8509222277392656, + "learning_rate": 2.9244455150872448e-06, + "loss": 0.5094, "step": 9120 }, { - "epoch": 0.96, - "grad_norm": 2.470305736923208, - "learning_rate": 4.2322729397992755e-08, - "loss": 0.6099, + "epoch": 0.65, + "grad_norm": 5.298498877267779, + "learning_rate": 2.9234000913428246e-06, + "loss": 0.4629, "step": 9121 }, { - "epoch": 0.96, - "grad_norm": 4.735639182147766, - "learning_rate": 4.210174554909796e-08, - "loss": 0.6468, + "epoch": 0.65, + "grad_norm": 1.889587237086541, + "learning_rate": 2.9223547772914483e-06, + "loss": 0.5122, "step": 9122 }, { - "epoch": 0.96, - "grad_norm": 2.4294443877199807, - "learning_rate": 4.188133769448932e-08, - "loss": 0.5507, + "epoch": 0.65, + "grad_norm": 2.6626645448575164, + "learning_rate": 2.921309572988331e-06, + "loss": 0.5966, "step": 9123 }, { - "epoch": 0.96, - "grad_norm": 3.268281292601878, - "learning_rate": 4.1661505859775245e-08, - "loss": 0.6073, + "epoch": 0.65, + "grad_norm": 0.6936885797313508, + "learning_rate": 2.9202644784886863e-06, + "loss": 0.4191, "step": 9124 }, { - "epoch": 0.96, - "grad_norm": 3.292116636642608, - "learning_rate": 4.1442250070494186e-08, - "loss": 0.625, + "epoch": 0.65, + "grad_norm": 1.7698025310630925, + "learning_rate": 2.9192194938477197e-06, + "loss": 0.561, "step": 9125 }, { - "epoch": 0.96, - "grad_norm": 3.0214437747145553, - "learning_rate": 4.1223570352118545e-08, - "loss": 0.5964, + "epoch": 0.65, + "grad_norm": 1.742444055050693, + "learning_rate": 2.9181746191206296e-06, + "loss": 0.5386, "step": 9126 }, { - "epoch": 0.96, - "grad_norm": 3.237561901276038, - "learning_rate": 4.100546673005412e-08, - "loss": 0.6468, + "epoch": 0.65, + "grad_norm": 1.635750401286632, + "learning_rate": 2.9171298543626094e-06, + "loss": 0.5009, "step": 9127 }, { - "epoch": 0.96, - "grad_norm": 3.1688911358412177, - "learning_rate": 4.078793922963953e-08, - "loss": 0.6302, + "epoch": 0.65, + "grad_norm": 2.184473278677268, + "learning_rate": 2.916085199628849e-06, + "loss": 0.5437, "step": 9128 }, { - "epoch": 0.96, - "grad_norm": 3.447810752311471, - "learning_rate": 4.057098787614677e-08, - "loss": 0.5529, + "epoch": 0.65, + "grad_norm": 0.6767015162089856, + "learning_rate": 2.915040654974529e-06, + "loss": 0.4251, "step": 9129 }, { - "epoch": 0.96, - "grad_norm": 2.636676069560152, - "learning_rate": 4.035461269478014e-08, - "loss": 0.653, + "epoch": 0.65, + "grad_norm": 1.830246567476984, + "learning_rate": 2.913996220454828e-06, + "loss": 0.5229, "step": 9130 }, { - "epoch": 0.96, - "grad_norm": 1.8829981767535657, - "learning_rate": 4.013881371067841e-08, - "loss": 0.617, + "epoch": 0.65, + "grad_norm": 1.9968279851940836, + "learning_rate": 2.9129518961249114e-06, + "loss": 0.5292, "step": 9131 }, { - "epoch": 0.96, - "grad_norm": 3.756084256939002, - "learning_rate": 3.992359094891096e-08, - "loss": 0.5825, + "epoch": 0.65, + "grad_norm": 1.7084140894745679, + "learning_rate": 2.9119076820399517e-06, + "loss": 0.5249, "step": 9132 }, { - "epoch": 0.96, - "grad_norm": 2.2300017538671466, - "learning_rate": 3.970894443448281e-08, - "loss": 0.5501, + "epoch": 0.65, + "grad_norm": 1.511815267251824, + "learning_rate": 2.910863578255102e-06, + "loss": 0.5401, "step": 9133 }, { - "epoch": 0.96, - "grad_norm": 3.0651156169351963, - "learning_rate": 3.949487419233122e-08, - "loss": 0.5455, + "epoch": 0.65, + "grad_norm": 4.356168419984041, + "learning_rate": 2.909819584825516e-06, + "loss": 0.5326, "step": 9134 }, { - "epoch": 0.96, - "grad_norm": 2.4150439079741504, - "learning_rate": 3.92813802473252e-08, - "loss": 0.6961, + "epoch": 0.65, + "grad_norm": 2.0771701803836335, + "learning_rate": 2.9087757018063434e-06, + "loss": 0.5129, "step": 9135 }, { - "epoch": 0.96, - "grad_norm": 3.197514010373594, - "learning_rate": 3.906846262426878e-08, - "loss": 0.6243, + "epoch": 0.65, + "grad_norm": 2.183058695978997, + "learning_rate": 2.9077319292527235e-06, + "loss": 0.5835, "step": 9136 }, { - "epoch": 0.96, - "grad_norm": 2.122714920113005, - "learning_rate": 3.885612134789718e-08, - "loss": 0.6401, + "epoch": 0.65, + "grad_norm": 2.245600146769842, + "learning_rate": 2.9066882672197944e-06, + "loss": 0.5677, "step": 9137 }, { - "epoch": 0.96, - "grad_norm": 0.9331466918924591, - "learning_rate": 3.864435644288123e-08, - "loss": 0.5596, + "epoch": 0.65, + "grad_norm": 1.5172333535787021, + "learning_rate": 2.9056447157626823e-06, + "loss": 0.4495, "step": 9138 }, { - "epoch": 0.96, - "grad_norm": 3.5512512202448434, - "learning_rate": 3.8433167933821234e-08, - "loss": 0.6279, + "epoch": 0.65, + "grad_norm": 1.8752268297469012, + "learning_rate": 2.9046012749365134e-06, + "loss": 0.606, "step": 9139 }, { - "epoch": 0.96, - "grad_norm": 2.3384806493446684, - "learning_rate": 3.822255584525369e-08, - "loss": 0.6029, + "epoch": 0.65, + "grad_norm": 1.8421887234326304, + "learning_rate": 2.9035579447964045e-06, + "loss": 0.514, "step": 9140 }, { - "epoch": 0.96, - "grad_norm": 2.7384044773724057, - "learning_rate": 3.8012520201646255e-08, - "loss": 0.5762, + "epoch": 0.65, + "grad_norm": 2.5955653759184454, + "learning_rate": 2.9025147253974695e-06, + "loss": 0.4884, "step": 9141 }, { - "epoch": 0.96, - "grad_norm": 2.537480825998291, - "learning_rate": 3.780306102740105e-08, - "loss": 0.6246, + "epoch": 0.65, + "grad_norm": 1.8218979729146112, + "learning_rate": 2.9014716167948155e-06, + "loss": 0.522, "step": 9142 }, { - "epoch": 0.96, - "grad_norm": 2.535534515356477, - "learning_rate": 3.7594178346851974e-08, - "loss": 0.6051, + "epoch": 0.65, + "grad_norm": 2.008799245319336, + "learning_rate": 2.9004286190435383e-06, + "loss": 0.5012, "step": 9143 }, { - "epoch": 0.96, - "grad_norm": 2.6217629251128436, - "learning_rate": 3.738587218426626e-08, - "loss": 0.5192, + "epoch": 0.65, + "grad_norm": 1.5598532865449968, + "learning_rate": 2.8993857321987385e-06, + "loss": 0.4545, "step": 9144 }, { - "epoch": 0.96, - "grad_norm": 3.930611934484491, - "learning_rate": 3.7178142563844e-08, - "loss": 0.554, + "epoch": 0.65, + "grad_norm": 2.1187891226444515, + "learning_rate": 2.898342956315501e-06, + "loss": 0.528, "step": 9145 }, { - "epoch": 0.96, - "grad_norm": 2.4744564645686906, - "learning_rate": 3.697098950971922e-08, - "loss": 0.6043, + "epoch": 0.65, + "grad_norm": 1.920936293937471, + "learning_rate": 2.8973002914489098e-06, + "loss": 0.5367, "step": 9146 }, { - "epoch": 0.96, - "grad_norm": 2.800360974943865, - "learning_rate": 3.676441304595879e-08, - "loss": 0.5262, + "epoch": 0.65, + "grad_norm": 1.7215885793665529, + "learning_rate": 2.896257737654042e-06, + "loss": 0.5563, "step": 9147 }, { - "epoch": 0.96, - "grad_norm": 2.2752239967517283, - "learning_rate": 3.655841319656128e-08, - "loss": 0.641, + "epoch": 0.65, + "grad_norm": 1.9995339251368327, + "learning_rate": 2.8952152949859714e-06, + "loss": 0.5626, "step": 9148 }, { - "epoch": 0.96, - "grad_norm": 2.9374913960050826, - "learning_rate": 3.635298998545922e-08, - "loss": 0.62, + "epoch": 0.65, + "grad_norm": 1.6887871743280758, + "learning_rate": 2.8941729634997584e-06, + "loss": 0.5606, "step": 9149 }, { - "epoch": 0.96, - "grad_norm": 2.780995594486591, - "learning_rate": 3.614814343651851e-08, - "loss": 0.6157, + "epoch": 0.65, + "grad_norm": 1.688499528366517, + "learning_rate": 2.893130743250465e-06, + "loss": 0.5573, "step": 9150 }, { - "epoch": 0.96, - "grad_norm": 2.194471742530128, - "learning_rate": 3.5943873573537903e-08, - "loss": 0.5811, + "epoch": 0.65, + "grad_norm": 1.8053165038463148, + "learning_rate": 2.8920886342931444e-06, + "loss": 0.4619, "step": 9151 }, { - "epoch": 0.96, - "grad_norm": 2.3614260129521387, - "learning_rate": 3.574018042024785e-08, - "loss": 0.5898, + "epoch": 0.65, + "grad_norm": 2.3806881003346683, + "learning_rate": 2.891046636682845e-06, + "loss": 0.5589, "step": 9152 }, { - "epoch": 0.96, - "grad_norm": 3.281118604888936, - "learning_rate": 3.553706400031331e-08, - "loss": 0.624, + "epoch": 0.65, + "grad_norm": 1.902122019322406, + "learning_rate": 2.890004750474611e-06, + "loss": 0.4429, "step": 9153 }, { - "epoch": 0.96, - "grad_norm": 3.4511839932038293, - "learning_rate": 3.533452433733209e-08, - "loss": 0.6539, + "epoch": 0.65, + "grad_norm": 1.898635296583468, + "learning_rate": 2.888962975723471e-06, + "loss": 0.5956, "step": 9154 }, { - "epoch": 0.96, - "grad_norm": 3.0189891199871286, - "learning_rate": 3.513256145483479e-08, - "loss": 0.5865, + "epoch": 0.65, + "grad_norm": 1.7721280205285528, + "learning_rate": 2.887921312484464e-06, + "loss": 0.5188, "step": 9155 }, { - "epoch": 0.96, - "grad_norm": 5.371611401435088, - "learning_rate": 3.493117537628432e-08, - "loss": 0.6437, + "epoch": 0.65, + "grad_norm": 1.6007764780313938, + "learning_rate": 2.886879760812608e-06, + "loss": 0.4761, "step": 9156 }, { - "epoch": 0.96, - "grad_norm": 0.8768550617990599, - "learning_rate": 3.4730366125076966e-08, - "loss": 0.5259, + "epoch": 0.65, + "grad_norm": 0.7995164646770508, + "learning_rate": 2.885838320762924e-06, + "loss": 0.4519, "step": 9157 }, { - "epoch": 0.96, - "grad_norm": 4.802661184621188, - "learning_rate": 3.453013372454295e-08, - "loss": 0.5858, + "epoch": 0.65, + "grad_norm": 1.901669090604936, + "learning_rate": 2.8847969923904225e-06, + "loss": 0.5013, "step": 9158 }, { - "epoch": 0.96, - "grad_norm": 2.4916523907020736, - "learning_rate": 3.433047819794366e-08, - "loss": 0.6361, + "epoch": 0.65, + "grad_norm": 1.9099657929034635, + "learning_rate": 2.883755775750111e-06, + "loss": 0.5669, "step": 9159 }, { - "epoch": 0.96, - "grad_norm": 2.3170062905496733, - "learning_rate": 3.413139956847611e-08, - "loss": 0.6925, + "epoch": 0.65, + "grad_norm": 1.9110806431936591, + "learning_rate": 2.8827146708969935e-06, + "loss": 0.5458, "step": 9160 }, { - "epoch": 0.96, - "grad_norm": 3.102122288265513, - "learning_rate": 3.3932897859267346e-08, - "loss": 0.6089, + "epoch": 0.65, + "grad_norm": 1.9182891986418684, + "learning_rate": 2.8816736778860566e-06, + "loss": 0.4935, "step": 9161 }, { - "epoch": 0.96, - "grad_norm": 2.212048736855744, - "learning_rate": 3.3734973093378367e-08, - "loss": 0.678, + "epoch": 0.65, + "grad_norm": 1.5203921105583955, + "learning_rate": 2.880632796772298e-06, + "loss": 0.5041, "step": 9162 }, { - "epoch": 0.96, - "grad_norm": 3.5526846645578467, - "learning_rate": 3.353762529380466e-08, - "loss": 0.6322, + "epoch": 0.65, + "grad_norm": 1.599742054897293, + "learning_rate": 2.879592027610695e-06, + "loss": 0.5305, "step": 9163 }, { - "epoch": 0.96, - "grad_norm": 2.6953009571329827, - "learning_rate": 3.334085448347346e-08, - "loss": 0.6071, + "epoch": 0.65, + "grad_norm": 1.9902824940569694, + "learning_rate": 2.878551370456225e-06, + "loss": 0.5162, "step": 9164 }, { - "epoch": 0.96, - "grad_norm": 2.09081032023049, - "learning_rate": 3.314466068524425e-08, - "loss": 0.5649, + "epoch": 0.65, + "grad_norm": 1.5565396103595257, + "learning_rate": 2.8775108253638605e-06, + "loss": 0.5403, "step": 9165 }, { - "epoch": 0.96, - "grad_norm": 2.8019153487339916, - "learning_rate": 3.294904392191045e-08, - "loss": 0.6405, + "epoch": 0.65, + "grad_norm": 2.0790674560027878, + "learning_rate": 2.8764703923885663e-06, + "loss": 0.5624, "step": 9166 }, { - "epoch": 0.96, - "grad_norm": 2.2938098097617368, - "learning_rate": 3.27540042161989e-08, - "loss": 0.6646, + "epoch": 0.65, + "grad_norm": 1.8666482826241881, + "learning_rate": 2.8754300715853035e-06, + "loss": 0.5956, "step": 9167 }, { - "epoch": 0.96, - "grad_norm": 2.475187200092886, - "learning_rate": 3.255954159076813e-08, - "loss": 0.5879, + "epoch": 0.65, + "grad_norm": 1.6450497955015766, + "learning_rate": 2.874389863009022e-06, + "loss": 0.5377, "step": 9168 }, { - "epoch": 0.96, - "grad_norm": 3.1691062969062194, - "learning_rate": 3.236565606821007e-08, - "loss": 0.5371, + "epoch": 0.65, + "grad_norm": 3.649811266278659, + "learning_rate": 2.873349766714669e-06, + "loss": 0.5647, "step": 9169 }, { - "epoch": 0.96, - "grad_norm": 2.4161686022837277, - "learning_rate": 3.2172347671050596e-08, - "loss": 0.6002, + "epoch": 0.65, + "grad_norm": 1.7870294154573634, + "learning_rate": 2.8723097827571887e-06, + "loss": 0.5458, "step": 9170 }, { - "epoch": 0.97, - "grad_norm": 2.853074919404103, - "learning_rate": 3.197961642174674e-08, - "loss": 0.5644, + "epoch": 0.65, + "grad_norm": 1.6636373270478908, + "learning_rate": 2.8712699111915143e-06, + "loss": 0.5217, "step": 9171 }, { - "epoch": 0.97, - "grad_norm": 4.210820383910226, - "learning_rate": 3.1787462342690036e-08, - "loss": 0.6206, + "epoch": 0.65, + "grad_norm": 0.7341497383219123, + "learning_rate": 2.870230152072579e-06, + "loss": 0.4261, "step": 9172 }, { - "epoch": 0.97, - "grad_norm": 2.628828526440614, - "learning_rate": 3.1595885456204845e-08, - "loss": 0.6029, + "epoch": 0.65, + "grad_norm": 4.0114796908380335, + "learning_rate": 2.8691905054552994e-06, + "loss": 0.4799, "step": 9173 }, { - "epoch": 0.97, - "grad_norm": 2.5487802779308995, - "learning_rate": 3.1404885784547256e-08, - "loss": 0.645, + "epoch": 0.65, + "grad_norm": 1.810177702667246, + "learning_rate": 2.8681509713946014e-06, + "loss": 0.5328, "step": 9174 }, { - "epoch": 0.97, - "grad_norm": 0.9690656931951284, - "learning_rate": 3.1214463349907295e-08, - "loss": 0.5252, + "epoch": 0.65, + "grad_norm": 1.6226085653214999, + "learning_rate": 2.867111549945391e-06, + "loss": 0.4702, "step": 9175 }, { - "epoch": 0.97, - "grad_norm": 1.965446772712665, - "learning_rate": 3.102461817440727e-08, - "loss": 0.6123, + "epoch": 0.65, + "grad_norm": 2.4067844495067865, + "learning_rate": 2.8660722411625778e-06, + "loss": 0.6179, "step": 9176 }, { - "epoch": 0.97, - "grad_norm": 2.6886931228239357, - "learning_rate": 3.083535028010343e-08, - "loss": 0.6453, + "epoch": 0.65, + "grad_norm": 0.7355554226232269, + "learning_rate": 2.8650330451010557e-06, + "loss": 0.427, "step": 9177 }, { - "epoch": 0.97, - "grad_norm": 5.884853895703779, - "learning_rate": 3.064665968898428e-08, - "loss": 0.7053, + "epoch": 0.65, + "grad_norm": 1.7938501530899296, + "learning_rate": 2.863993961815726e-06, + "loss": 0.538, "step": 9178 }, { - "epoch": 0.97, - "grad_norm": 3.1113257956144733, - "learning_rate": 3.045854642297175e-08, - "loss": 0.5508, + "epoch": 0.65, + "grad_norm": 1.6962142045092483, + "learning_rate": 2.862954991361471e-06, + "loss": 0.49, "step": 9179 }, { - "epoch": 0.97, - "grad_norm": 2.493207760394977, - "learning_rate": 3.0271010503918896e-08, - "loss": 0.5488, + "epoch": 0.65, + "grad_norm": 2.309038951121496, + "learning_rate": 2.861916133793173e-06, + "loss": 0.5223, "step": 9180 }, { - "epoch": 0.97, - "grad_norm": 2.3766479319028724, - "learning_rate": 3.0084051953614414e-08, - "loss": 0.6282, + "epoch": 0.65, + "grad_norm": 1.9297972013347495, + "learning_rate": 2.860877389165714e-06, + "loss": 0.5564, "step": 9181 }, { - "epoch": 0.97, - "grad_norm": 2.390922910780254, - "learning_rate": 2.98976707937787e-08, - "loss": 0.5949, + "epoch": 0.65, + "grad_norm": 1.7934832808001466, + "learning_rate": 2.8598387575339564e-06, + "loss": 0.5292, "step": 9182 }, { - "epoch": 0.97, - "grad_norm": 4.637454787285773, - "learning_rate": 2.971186704606388e-08, - "loss": 0.6665, + "epoch": 0.65, + "grad_norm": 1.7300594203134563, + "learning_rate": 2.85880023895277e-06, + "loss": 0.4593, "step": 9183 }, { - "epoch": 0.97, - "grad_norm": 3.1213890226656327, - "learning_rate": 2.9526640732056577e-08, - "loss": 0.51, + "epoch": 0.65, + "grad_norm": 1.8250566388660086, + "learning_rate": 2.857761833477007e-06, + "loss": 0.4788, "step": 9184 }, { - "epoch": 0.97, - "grad_norm": 2.490219535861415, - "learning_rate": 2.9341991873276244e-08, - "loss": 0.6355, + "epoch": 0.65, + "grad_norm": 2.587926903641795, + "learning_rate": 2.8567235411615265e-06, + "loss": 0.5377, "step": 9185 }, { - "epoch": 0.97, - "grad_norm": 2.218227767506622, - "learning_rate": 2.9157920491174606e-08, - "loss": 0.683, + "epoch": 0.65, + "grad_norm": 2.076744444427647, + "learning_rate": 2.8556853620611686e-06, + "loss": 0.4886, "step": 9186 }, { - "epoch": 0.97, - "grad_norm": 2.2005059135053537, - "learning_rate": 2.8974426607136784e-08, - "loss": 0.5855, + "epoch": 0.65, + "grad_norm": 1.8902720984402064, + "learning_rate": 2.854647296230776e-06, + "loss": 0.5257, "step": 9187 }, { - "epoch": 0.97, - "grad_norm": 2.519798208810904, - "learning_rate": 2.8791510242480168e-08, - "loss": 0.6008, + "epoch": 0.65, + "grad_norm": 1.7051974394668763, + "learning_rate": 2.853609343725183e-06, + "loss": 0.4931, "step": 9188 }, { - "epoch": 0.97, - "grad_norm": 1.0120446604343019, - "learning_rate": 2.8609171418454985e-08, - "loss": 0.5326, + "epoch": 0.65, + "grad_norm": 2.4742301361047168, + "learning_rate": 2.852571504599217e-06, + "loss": 0.5173, "step": 9189 }, { - "epoch": 0.97, - "grad_norm": 2.450219497355183, - "learning_rate": 2.842741015624595e-08, - "loss": 0.6363, + "epoch": 0.65, + "grad_norm": 1.6856630772661272, + "learning_rate": 2.8515337789077026e-06, + "loss": 0.5099, "step": 9190 }, { - "epoch": 0.97, - "grad_norm": 3.052648319059015, - "learning_rate": 2.824622647696895e-08, - "loss": 0.5779, + "epoch": 0.65, + "grad_norm": 5.268095978659321, + "learning_rate": 2.850496166705452e-06, + "loss": 0.4601, "step": 9191 }, { - "epoch": 0.97, - "grad_norm": 3.335643356301621, - "learning_rate": 2.8065620401673823e-08, - "loss": 0.5543, + "epoch": 0.65, + "grad_norm": 3.41932900636466, + "learning_rate": 2.849458668047278e-06, + "loss": 0.5099, "step": 9192 }, { - "epoch": 0.97, - "grad_norm": 2.8344656986626906, - "learning_rate": 2.7885591951342104e-08, - "loss": 0.549, + "epoch": 0.65, + "grad_norm": 1.8355156684447664, + "learning_rate": 2.848421282987983e-06, + "loss": 0.4698, "step": 9193 }, { - "epoch": 0.97, - "grad_norm": 3.599346650297371, - "learning_rate": 2.77061411468893e-08, - "loss": 0.6037, + "epoch": 0.65, + "grad_norm": 1.7160351651580157, + "learning_rate": 2.8473840115823675e-06, + "loss": 0.5697, "step": 9194 }, { - "epoch": 0.97, - "grad_norm": 3.0866091554980106, - "learning_rate": 2.752726800916372e-08, - "loss": 0.5579, + "epoch": 0.65, + "grad_norm": 1.69471406297245, + "learning_rate": 2.8463468538852225e-06, + "loss": 0.5311, "step": 9195 }, { - "epoch": 0.97, - "grad_norm": 2.5707910612614655, - "learning_rate": 2.734897255894653e-08, - "loss": 0.6654, + "epoch": 0.65, + "grad_norm": 1.9812985813261297, + "learning_rate": 2.845309809951333e-06, + "loss": 0.5336, "step": 9196 }, { - "epoch": 0.97, - "grad_norm": 2.5872290746708435, - "learning_rate": 2.71712548169506e-08, - "loss": 0.6281, + "epoch": 0.65, + "grad_norm": 1.7245171239765553, + "learning_rate": 2.8442728798354837e-06, + "loss": 0.5125, "step": 9197 }, { - "epoch": 0.97, - "grad_norm": 3.2731949836335774, - "learning_rate": 2.6994114803823858e-08, - "loss": 0.6685, + "epoch": 0.65, + "grad_norm": 2.171097880203168, + "learning_rate": 2.8432360635924434e-06, + "loss": 0.5737, "step": 9198 }, { - "epoch": 0.97, - "grad_norm": 2.899660903128125, - "learning_rate": 2.6817552540144842e-08, - "loss": 0.6137, + "epoch": 0.65, + "grad_norm": 1.9059421697353383, + "learning_rate": 2.8421993612769817e-06, + "loss": 0.4865, "step": 9199 }, { - "epoch": 0.97, - "grad_norm": 2.320763158061146, - "learning_rate": 2.6641568046427146e-08, - "loss": 0.6083, + "epoch": 0.65, + "grad_norm": 1.775668768457437, + "learning_rate": 2.8411627729438616e-06, + "loss": 0.4951, "step": 9200 }, { - "epoch": 0.97, - "grad_norm": 2.363696210436481, - "learning_rate": 2.646616134311497e-08, - "loss": 0.6219, + "epoch": 0.65, + "grad_norm": 1.6478315936015098, + "learning_rate": 2.8401262986478396e-06, + "loss": 0.4924, "step": 9201 }, { - "epoch": 0.97, - "grad_norm": 2.2956123393115724, - "learning_rate": 2.629133245058757e-08, - "loss": 0.6388, + "epoch": 0.65, + "grad_norm": 1.846348492591966, + "learning_rate": 2.8390899384436667e-06, + "loss": 0.5, "step": 9202 }, { - "epoch": 0.97, - "grad_norm": 3.8842450507583464, - "learning_rate": 2.6117081389155362e-08, - "loss": 0.6312, + "epoch": 0.65, + "grad_norm": 1.752893292083521, + "learning_rate": 2.838053692386081e-06, + "loss": 0.4914, "step": 9203 }, { - "epoch": 0.97, - "grad_norm": 2.3338099016770926, - "learning_rate": 2.5943408179062713e-08, - "loss": 0.5896, + "epoch": 0.65, + "grad_norm": 1.6927451075127622, + "learning_rate": 2.8370175605298296e-06, + "loss": 0.5579, "step": 9204 }, { - "epoch": 0.97, - "grad_norm": 2.3320839464277805, - "learning_rate": 2.5770312840486255e-08, - "loss": 0.6366, + "epoch": 0.65, + "grad_norm": 1.7841354076107514, + "learning_rate": 2.835981542929638e-06, + "loss": 0.5686, "step": 9205 }, { - "epoch": 0.97, - "grad_norm": 2.984624737573807, - "learning_rate": 2.5597795393536017e-08, - "loss": 0.6062, + "epoch": 0.65, + "grad_norm": 1.7380021488273631, + "learning_rate": 2.834945639640235e-06, + "loss": 0.5407, "step": 9206 }, { - "epoch": 0.97, - "grad_norm": 0.9651826359687896, - "learning_rate": 2.5425855858253744e-08, - "loss": 0.5126, + "epoch": 0.65, + "grad_norm": 2.3258914948572307, + "learning_rate": 2.833909850716336e-06, + "loss": 0.4976, "step": 9207 }, { - "epoch": 0.97, - "grad_norm": 2.288749577144304, - "learning_rate": 2.5254494254616236e-08, - "loss": 0.6235, + "epoch": 0.65, + "grad_norm": 1.8730161499635014, + "learning_rate": 2.8328741762126607e-06, + "loss": 0.5397, "step": 9208 }, { - "epoch": 0.97, - "grad_norm": 2.813589985884465, - "learning_rate": 2.5083710602530897e-08, - "loss": 0.6208, + "epoch": 0.65, + "grad_norm": 1.7299205883915294, + "learning_rate": 2.8318386161839163e-06, + "loss": 0.5241, "step": 9209 }, { - "epoch": 0.97, - "grad_norm": 2.407381015638563, - "learning_rate": 2.4913504921839084e-08, - "loss": 0.5145, + "epoch": 0.65, + "grad_norm": 1.6845094122112925, + "learning_rate": 2.8308031706847994e-06, + "loss": 0.4841, "step": 9210 }, { - "epoch": 0.97, - "grad_norm": 2.2924984215742583, - "learning_rate": 2.4743877232314416e-08, - "loss": 0.5947, + "epoch": 0.65, + "grad_norm": 1.5731515300190162, + "learning_rate": 2.829767839770012e-06, + "loss": 0.5358, "step": 9211 }, { - "epoch": 0.97, - "grad_norm": 3.1338707036223505, - "learning_rate": 2.457482755366447e-08, - "loss": 0.5781, + "epoch": 0.65, + "grad_norm": 1.5947759187815131, + "learning_rate": 2.8287326234942403e-06, + "loss": 0.5638, "step": 9212 }, { - "epoch": 0.97, - "grad_norm": 2.277076857886843, - "learning_rate": 2.4406355905528534e-08, - "loss": 0.6377, + "epoch": 0.65, + "grad_norm": 0.8152013611848146, + "learning_rate": 2.8276975219121705e-06, + "loss": 0.4498, "step": 9213 }, { - "epoch": 0.97, - "grad_norm": 2.5947856777870917, - "learning_rate": 2.4238462307478727e-08, - "loss": 0.6077, + "epoch": 0.65, + "grad_norm": 1.633436745150401, + "learning_rate": 2.826662535078474e-06, + "loss": 0.5167, "step": 9214 }, { - "epoch": 0.97, - "grad_norm": 2.869549468783701, - "learning_rate": 2.4071146779021116e-08, - "loss": 0.5939, + "epoch": 0.65, + "grad_norm": 3.0306683179296225, + "learning_rate": 2.825627663047831e-06, + "loss": 0.5616, "step": 9215 }, { - "epoch": 0.97, - "grad_norm": 2.5062122517145817, - "learning_rate": 2.3904409339594036e-08, - "loss": 0.5345, + "epoch": 0.65, + "grad_norm": 0.6581517902793548, + "learning_rate": 2.8245929058749004e-06, + "loss": 0.4238, "step": 9216 }, { - "epoch": 0.97, - "grad_norm": 2.9256223760450024, - "learning_rate": 2.373825000856811e-08, - "loss": 0.5867, + "epoch": 0.65, + "grad_norm": 2.3908531170067024, + "learning_rate": 2.823558263614344e-06, + "loss": 0.5086, "step": 9217 }, { - "epoch": 0.97, - "grad_norm": 2.919362951808869, - "learning_rate": 2.3572668805247335e-08, - "loss": 0.6491, + "epoch": 0.65, + "grad_norm": 1.7372537977656668, + "learning_rate": 2.8225237363208148e-06, + "loss": 0.5054, "step": 9218 }, { - "epoch": 0.97, - "grad_norm": 2.6991155767439214, - "learning_rate": 2.3407665748868548e-08, - "loss": 0.5601, + "epoch": 0.65, + "grad_norm": 1.6977616383481866, + "learning_rate": 2.82148932404896e-06, + "loss": 0.5042, "step": 9219 }, { - "epoch": 0.97, - "grad_norm": 2.246298603916671, - "learning_rate": 2.324324085860086e-08, - "loss": 0.5792, + "epoch": 0.65, + "grad_norm": 1.9320238236481364, + "learning_rate": 2.820455026853423e-06, + "loss": 0.6162, "step": 9220 }, { - "epoch": 0.97, - "grad_norm": 2.4031521640333504, - "learning_rate": 2.3079394153547874e-08, - "loss": 0.6562, + "epoch": 0.65, + "grad_norm": 1.6694785564525654, + "learning_rate": 2.819420844788835e-06, + "loss": 0.5954, "step": 9221 }, { - "epoch": 0.97, - "grad_norm": 2.4774885599982843, - "learning_rate": 2.2916125652743814e-08, - "loss": 0.5821, + "epoch": 0.65, + "grad_norm": 1.7541670210174858, + "learning_rate": 2.8183867779098262e-06, + "loss": 0.515, "step": 9222 }, { - "epoch": 0.97, - "grad_norm": 2.9008654534964395, - "learning_rate": 2.2753435375156284e-08, - "loss": 0.6582, + "epoch": 0.65, + "grad_norm": 1.734812944385469, + "learning_rate": 2.817352826271021e-06, + "loss": 0.493, "step": 9223 }, { - "epoch": 0.97, - "grad_norm": 6.023778464208766, - "learning_rate": 2.2591323339687387e-08, - "loss": 0.6167, + "epoch": 0.65, + "grad_norm": 1.9359575841068561, + "learning_rate": 2.816318989927035e-06, + "loss": 0.4903, "step": 9224 }, { - "epoch": 0.97, - "grad_norm": 2.3713659550667208, - "learning_rate": 2.242978956517039e-08, - "loss": 0.6183, + "epoch": 0.65, + "grad_norm": 1.7424749149611731, + "learning_rate": 2.8152852689324807e-06, + "loss": 0.4928, "step": 9225 }, { - "epoch": 0.97, - "grad_norm": 2.306655048988526, - "learning_rate": 2.2268834070371946e-08, - "loss": 0.6889, + "epoch": 0.65, + "grad_norm": 1.9194836941090903, + "learning_rate": 2.814251663341958e-06, + "loss": 0.4957, "step": 9226 }, { - "epoch": 0.97, - "grad_norm": 3.0952640364000117, - "learning_rate": 2.210845687399099e-08, - "loss": 0.5602, + "epoch": 0.65, + "grad_norm": 1.8364464075597358, + "learning_rate": 2.813218173210072e-06, + "loss": 0.5366, "step": 9227 }, { - "epoch": 0.97, - "grad_norm": 2.3413653802222494, - "learning_rate": 2.1948657994659838e-08, - "loss": 0.5923, + "epoch": 0.65, + "grad_norm": 2.170775762020465, + "learning_rate": 2.8121847985914108e-06, + "loss": 0.6025, "step": 9228 }, { - "epoch": 0.97, - "grad_norm": 2.77564140602996, - "learning_rate": 2.1789437450943084e-08, - "loss": 0.5727, + "epoch": 0.65, + "grad_norm": 1.7052730735353285, + "learning_rate": 2.8111515395405615e-06, + "loss": 0.5543, "step": 9229 }, { - "epoch": 0.97, - "grad_norm": 2.421552954655445, - "learning_rate": 2.163079526133982e-08, - "loss": 0.611, + "epoch": 0.65, + "grad_norm": 1.792400223004841, + "learning_rate": 2.8101183961121046e-06, + "loss": 0.5497, "step": 9230 }, { - "epoch": 0.97, - "grad_norm": 2.169302617388929, - "learning_rate": 2.1472731444279193e-08, - "loss": 0.5952, + "epoch": 0.66, + "grad_norm": 1.4631512126909236, + "learning_rate": 2.8090853683606143e-06, + "loss": 0.5033, "step": 9231 }, { - "epoch": 0.97, - "grad_norm": 2.101895600351323, - "learning_rate": 2.13152460181254e-08, - "loss": 0.6372, + "epoch": 0.66, + "grad_norm": 1.9356325420576315, + "learning_rate": 2.8080524563406607e-06, + "loss": 0.5572, "step": 9232 }, { - "epoch": 0.97, - "grad_norm": 3.657299806141878, - "learning_rate": 2.115833900117381e-08, - "loss": 0.6147, + "epoch": 0.66, + "grad_norm": 1.7417291538556863, + "learning_rate": 2.8070196601068e-06, + "loss": 0.4954, "step": 9233 }, { - "epoch": 0.97, - "grad_norm": 2.239460891177711, - "learning_rate": 2.1002010411654838e-08, - "loss": 0.591, + "epoch": 0.66, + "grad_norm": 1.826998556068702, + "learning_rate": 2.805986979713596e-06, + "loss": 0.6107, "step": 9234 }, { - "epoch": 0.97, - "grad_norm": 3.488691890857634, - "learning_rate": 2.0846260267728957e-08, - "loss": 0.6899, + "epoch": 0.66, + "grad_norm": 1.7365963405401088, + "learning_rate": 2.8049544152155912e-06, + "loss": 0.4931, "step": 9235 }, { - "epoch": 0.97, - "grad_norm": 2.1238152317575727, - "learning_rate": 2.069108858749169e-08, - "loss": 0.6646, + "epoch": 0.66, + "grad_norm": 1.7476107889090398, + "learning_rate": 2.803921966667332e-06, + "loss": 0.5037, "step": 9236 }, { - "epoch": 0.97, - "grad_norm": 2.487320361580777, - "learning_rate": 2.0536495388969734e-08, - "loss": 0.6297, + "epoch": 0.66, + "grad_norm": 1.7902239574645038, + "learning_rate": 2.802889634123357e-06, + "loss": 0.5386, "step": 9237 }, { - "epoch": 0.97, - "grad_norm": 2.341768489528419, - "learning_rate": 2.0382480690123718e-08, - "loss": 0.472, + "epoch": 0.66, + "grad_norm": 1.8428493787010312, + "learning_rate": 2.801857417638195e-06, + "loss": 0.4752, "step": 9238 }, { - "epoch": 0.97, - "grad_norm": 3.233011462575182, - "learning_rate": 2.0229044508845997e-08, - "loss": 0.6539, + "epoch": 0.66, + "grad_norm": 1.4974175025634846, + "learning_rate": 2.800825317266376e-06, + "loss": 0.4931, "step": 9239 }, { - "epoch": 0.97, - "grad_norm": 2.121493412352339, - "learning_rate": 2.007618686296342e-08, - "loss": 0.5738, + "epoch": 0.66, + "grad_norm": 1.9900314212309922, + "learning_rate": 2.7997933330624116e-06, + "loss": 0.5013, "step": 9240 }, { - "epoch": 0.97, - "grad_norm": 0.9581343879954742, - "learning_rate": 1.9923907770233453e-08, - "loss": 0.5091, + "epoch": 0.66, + "grad_norm": 1.832737043728728, + "learning_rate": 2.7987614650808206e-06, + "loss": 0.5972, "step": 9241 }, { - "epoch": 0.97, - "grad_norm": 2.403223954181605, - "learning_rate": 1.9772207248348607e-08, - "loss": 0.6106, + "epoch": 0.66, + "grad_norm": 1.6351588349335633, + "learning_rate": 2.797729713376106e-06, + "loss": 0.514, "step": 9242 }, { - "epoch": 0.97, - "grad_norm": 0.8777448291544866, - "learning_rate": 1.962108531493201e-08, - "loss": 0.5469, + "epoch": 0.66, + "grad_norm": 1.6632863927196342, + "learning_rate": 2.7966980780027735e-06, + "loss": 0.4879, "step": 9243 }, { - "epoch": 0.97, - "grad_norm": 2.9765736631382067, - "learning_rate": 1.9470541987540727e-08, - "loss": 0.5947, + "epoch": 0.66, + "grad_norm": 1.5743530225730007, + "learning_rate": 2.7956665590153102e-06, + "loss": 0.4898, "step": 9244 }, { - "epoch": 0.97, - "grad_norm": 3.5869579127381717, - "learning_rate": 1.9320577283664656e-08, - "loss": 0.6733, + "epoch": 0.66, + "grad_norm": 1.7619056937780793, + "learning_rate": 2.7946351564682105e-06, + "loss": 0.4859, "step": 9245 }, { - "epoch": 0.97, - "grad_norm": 2.7935438970542363, - "learning_rate": 1.9171191220726527e-08, - "loss": 0.5995, + "epoch": 0.66, + "grad_norm": 1.9704870364714537, + "learning_rate": 2.7936038704159564e-06, + "loss": 0.5519, "step": 9246 }, { - "epoch": 0.97, - "grad_norm": 2.928327785902899, - "learning_rate": 1.902238381608079e-08, - "loss": 0.5998, + "epoch": 0.66, + "grad_norm": 1.8182461740118228, + "learning_rate": 2.7925727009130204e-06, + "loss": 0.4593, "step": 9247 }, { - "epoch": 0.97, - "grad_norm": 3.4791238402015456, - "learning_rate": 1.88741550870164e-08, - "loss": 0.5935, + "epoch": 0.66, + "grad_norm": 1.8414737536759742, + "learning_rate": 2.791541648013874e-06, + "loss": 0.4474, "step": 9248 }, { - "epoch": 0.97, - "grad_norm": 2.863133326433092, - "learning_rate": 1.8726505050753464e-08, - "loss": 0.5795, + "epoch": 0.66, + "grad_norm": 0.6468683569543854, + "learning_rate": 2.7905107117729813e-06, + "loss": 0.418, "step": 9249 }, { - "epoch": 0.97, - "grad_norm": 4.0631417802895955, - "learning_rate": 1.8579433724446037e-08, - "loss": 0.5772, + "epoch": 0.66, + "grad_norm": 1.5676782973967742, + "learning_rate": 2.7894798922448006e-06, + "loss": 0.5688, "step": 9250 }, { - "epoch": 0.97, - "grad_norm": 2.2432282328723803, - "learning_rate": 1.8432941125179904e-08, - "loss": 0.5114, + "epoch": 0.66, + "grad_norm": 1.8083141886729648, + "learning_rate": 2.788449189483782e-06, + "loss": 0.5697, "step": 9251 }, { - "epoch": 0.97, - "grad_norm": 2.5097002070968535, - "learning_rate": 1.8287027269974777e-08, - "loss": 0.6283, + "epoch": 0.66, + "grad_norm": 2.1331555849335153, + "learning_rate": 2.7874186035443695e-06, + "loss": 0.5697, "step": 9252 }, { - "epoch": 0.97, - "grad_norm": 2.415419955262919, - "learning_rate": 1.81416921757821e-08, - "loss": 0.6065, + "epoch": 0.66, + "grad_norm": 1.4770902294896449, + "learning_rate": 2.786388134481004e-06, + "loss": 0.4777, "step": 9253 }, + { + "epoch": 0.66, + "grad_norm": 0.711291720105949, + "learning_rate": 2.7853577823481183e-06, + "loss": 0.4181, + "step": 9254 + }, + { + "epoch": 0.66, + "grad_norm": 1.9038756772823697, + "learning_rate": 2.78432754720014e-06, + "loss": 0.6013, + "step": 9255 + }, + { + "epoch": 0.66, + "grad_norm": 0.6938828438640092, + "learning_rate": 2.783297429091485e-06, + "loss": 0.4236, + "step": 9256 + }, + { + "epoch": 0.66, + "grad_norm": 1.7582971766812825, + "learning_rate": 2.7822674280765748e-06, + "loss": 0.4507, + "step": 9257 + }, + { + "epoch": 0.66, + "grad_norm": 1.572472733266068, + "learning_rate": 2.7812375442098117e-06, + "loss": 0.435, + "step": 9258 + }, + { + "epoch": 0.66, + "grad_norm": 2.115866685954038, + "learning_rate": 2.7802077775456e-06, + "loss": 0.5994, + "step": 9259 + }, + { + "epoch": 0.66, + "grad_norm": 0.7813877281360558, + "learning_rate": 2.779178128138335e-06, + "loss": 0.4074, + "step": 9260 + }, + { + "epoch": 0.66, + "grad_norm": 1.8277438162168547, + "learning_rate": 2.7781485960424065e-06, + "loss": 0.4952, + "step": 9261 + }, + { + "epoch": 0.66, + "grad_norm": 1.713533068768173, + "learning_rate": 2.7771191813122e-06, + "loss": 0.5953, + "step": 9262 + }, + { + "epoch": 0.66, + "grad_norm": 1.642190276930909, + "learning_rate": 2.776089884002086e-06, + "loss": 0.5313, + "step": 9263 + }, + { + "epoch": 0.66, + "grad_norm": 1.6950645395268544, + "learning_rate": 2.7750607041664446e-06, + "loss": 0.5733, + "step": 9264 + }, + { + "epoch": 0.66, + "grad_norm": 1.8139080282852396, + "learning_rate": 2.7740316418596338e-06, + "loss": 0.5558, + "step": 9265 + }, + { + "epoch": 0.66, + "grad_norm": 1.6721247076595647, + "learning_rate": 2.7730026971360135e-06, + "loss": 0.5896, + "step": 9266 + }, + { + "epoch": 0.66, + "grad_norm": 1.6511017127903675, + "learning_rate": 2.771973870049938e-06, + "loss": 0.481, + "step": 9267 + }, + { + "epoch": 0.66, + "grad_norm": 1.9303518769716375, + "learning_rate": 2.7709451606557512e-06, + "loss": 0.5597, + "step": 9268 + }, + { + "epoch": 0.66, + "grad_norm": 1.8906158271664704, + "learning_rate": 2.7699165690077967e-06, + "loss": 0.4665, + "step": 9269 + }, + { + "epoch": 0.66, + "grad_norm": 1.4990544713190823, + "learning_rate": 2.768888095160403e-06, + "loss": 0.5209, + "step": 9270 + }, + { + "epoch": 0.66, + "grad_norm": 1.8522669652814832, + "learning_rate": 2.7678597391679005e-06, + "loss": 0.4965, + "step": 9271 + }, + { + "epoch": 0.66, + "grad_norm": 2.2846770621647994, + "learning_rate": 2.7668315010846103e-06, + "loss": 0.5321, + "step": 9272 + }, + { + "epoch": 0.66, + "grad_norm": 2.0553969485821004, + "learning_rate": 2.765803380964847e-06, + "loss": 0.51, + "step": 9273 + }, + { + "epoch": 0.66, + "grad_norm": 2.260029541259063, + "learning_rate": 2.7647753788629195e-06, + "loss": 0.5643, + "step": 9274 + }, + { + "epoch": 0.66, + "grad_norm": 1.723609956085677, + "learning_rate": 2.763747494833131e-06, + "loss": 0.5034, + "step": 9275 + }, + { + "epoch": 0.66, + "grad_norm": 1.9499304426733872, + "learning_rate": 2.762719728929779e-06, + "loss": 0.5285, + "step": 9276 + }, + { + "epoch": 0.66, + "grad_norm": 1.8133685210442814, + "learning_rate": 2.7616920812071497e-06, + "loss": 0.5507, + "step": 9277 + }, + { + "epoch": 0.66, + "grad_norm": 1.5803550019868808, + "learning_rate": 2.760664551719529e-06, + "loss": 0.5437, + "step": 9278 + }, + { + "epoch": 0.66, + "grad_norm": 2.1062895399459682, + "learning_rate": 2.7596371405211953e-06, + "loss": 0.4909, + "step": 9279 + }, + { + "epoch": 0.66, + "grad_norm": 2.212459526923453, + "learning_rate": 2.75860984766642e-06, + "loss": 0.5346, + "step": 9280 + }, + { + "epoch": 0.66, + "grad_norm": 1.6612916414737877, + "learning_rate": 2.7575826732094664e-06, + "loss": 0.5078, + "step": 9281 + }, + { + "epoch": 0.66, + "grad_norm": 1.7995843503628777, + "learning_rate": 2.756555617204594e-06, + "loss": 0.5555, + "step": 9282 + }, + { + "epoch": 0.66, + "grad_norm": 2.109575929476647, + "learning_rate": 2.7555286797060554e-06, + "loss": 0.5772, + "step": 9283 + }, + { + "epoch": 0.66, + "grad_norm": 0.7994388654430824, + "learning_rate": 2.754501860768098e-06, + "loss": 0.4452, + "step": 9284 + }, + { + "epoch": 0.66, + "grad_norm": 2.075919769946844, + "learning_rate": 2.7534751604449626e-06, + "loss": 0.5676, + "step": 9285 + }, + { + "epoch": 0.66, + "grad_norm": 1.756488185789471, + "learning_rate": 2.752448578790878e-06, + "loss": 0.5328, + "step": 9286 + }, + { + "epoch": 0.66, + "grad_norm": 1.944593874313746, + "learning_rate": 2.7514221158600784e-06, + "loss": 0.5183, + "step": 9287 + }, + { + "epoch": 0.66, + "grad_norm": 1.525239231754289, + "learning_rate": 2.7503957717067808e-06, + "loss": 0.4971, + "step": 9288 + }, + { + "epoch": 0.66, + "grad_norm": 2.417064782487489, + "learning_rate": 2.7493695463852007e-06, + "loss": 0.4959, + "step": 9289 + }, + { + "epoch": 0.66, + "grad_norm": 1.686435627184048, + "learning_rate": 2.748343439949547e-06, + "loss": 0.5723, + "step": 9290 + }, + { + "epoch": 0.66, + "grad_norm": 1.6502132597590549, + "learning_rate": 2.747317452454023e-06, + "loss": 0.506, + "step": 9291 + }, + { + "epoch": 0.66, + "grad_norm": 2.7640331740878117, + "learning_rate": 2.746291583952826e-06, + "loss": 0.5243, + "step": 9292 + }, + { + "epoch": 0.66, + "grad_norm": 1.6205110126490547, + "learning_rate": 2.7452658345001405e-06, + "loss": 0.5256, + "step": 9293 + }, + { + "epoch": 0.66, + "grad_norm": 1.4636302116446764, + "learning_rate": 2.744240204150157e-06, + "loss": 0.5714, + "step": 9294 + }, + { + "epoch": 0.66, + "grad_norm": 1.7797221685885345, + "learning_rate": 2.743214692957048e-06, + "loss": 0.6032, + "step": 9295 + }, + { + "epoch": 0.66, + "grad_norm": 1.4560745076320603, + "learning_rate": 2.742189300974986e-06, + "loss": 0.5685, + "step": 9296 + }, + { + "epoch": 0.66, + "grad_norm": 1.789475038177261, + "learning_rate": 2.7411640282581355e-06, + "loss": 0.5125, + "step": 9297 + }, + { + "epoch": 0.66, + "grad_norm": 1.7508253438300656, + "learning_rate": 2.740138874860655e-06, + "loss": 0.5704, + "step": 9298 + }, + { + "epoch": 0.66, + "grad_norm": 1.8482259702939492, + "learning_rate": 2.739113840836699e-06, + "loss": 0.5827, + "step": 9299 + }, + { + "epoch": 0.66, + "grad_norm": 1.5229068750500316, + "learning_rate": 2.7380889262404082e-06, + "loss": 0.5205, + "step": 9300 + }, + { + "epoch": 0.66, + "grad_norm": 1.874173951215821, + "learning_rate": 2.737064131125925e-06, + "loss": 0.5666, + "step": 9301 + }, + { + "epoch": 0.66, + "grad_norm": 2.0662724707522493, + "learning_rate": 2.7360394555473826e-06, + "loss": 0.5832, + "step": 9302 + }, + { + "epoch": 0.66, + "grad_norm": 2.067854630452239, + "learning_rate": 2.7350148995589067e-06, + "loss": 0.5867, + "step": 9303 + }, + { + "epoch": 0.66, + "grad_norm": 1.911388843056198, + "learning_rate": 2.7339904632146206e-06, + "loss": 0.5259, + "step": 9304 + }, + { + "epoch": 0.66, + "grad_norm": 2.0834183385996417, + "learning_rate": 2.7329661465686337e-06, + "loss": 0.4833, + "step": 9305 + }, + { + "epoch": 0.66, + "grad_norm": 2.0845536322063714, + "learning_rate": 2.73194194967506e-06, + "loss": 0.4826, + "step": 9306 + }, + { + "epoch": 0.66, + "grad_norm": 1.8636226624950012, + "learning_rate": 2.730917872587996e-06, + "loss": 0.5509, + "step": 9307 + }, + { + "epoch": 0.66, + "grad_norm": 0.6800080943890687, + "learning_rate": 2.7298939153615378e-06, + "loss": 0.4178, + "step": 9308 + }, + { + "epoch": 0.66, + "grad_norm": 1.8532234980719897, + "learning_rate": 2.728870078049776e-06, + "loss": 0.5124, + "step": 9309 + }, + { + "epoch": 0.66, + "grad_norm": 1.96553799181879, + "learning_rate": 2.727846360706794e-06, + "loss": 0.5035, + "step": 9310 + }, + { + "epoch": 0.66, + "grad_norm": 1.8353900574617938, + "learning_rate": 2.726822763386664e-06, + "loss": 0.5928, + "step": 9311 + }, + { + "epoch": 0.66, + "grad_norm": 1.8941871909253338, + "learning_rate": 2.725799286143457e-06, + "loss": 0.569, + "step": 9312 + }, + { + "epoch": 0.66, + "grad_norm": 1.83019971316929, + "learning_rate": 2.7247759290312415e-06, + "loss": 0.5392, + "step": 9313 + }, + { + "epoch": 0.66, + "grad_norm": 1.5965189125401191, + "learning_rate": 2.7237526921040686e-06, + "loss": 0.5529, + "step": 9314 + }, + { + "epoch": 0.66, + "grad_norm": 2.0681856101171383, + "learning_rate": 2.722729575415994e-06, + "loss": 0.5251, + "step": 9315 + }, + { + "epoch": 0.66, + "grad_norm": 1.625842698559353, + "learning_rate": 2.721706579021055e-06, + "loss": 0.5025, + "step": 9316 + }, + { + "epoch": 0.66, + "grad_norm": 1.9939530437945896, + "learning_rate": 2.720683702973299e-06, + "loss": 0.5313, + "step": 9317 + }, + { + "epoch": 0.66, + "grad_norm": 1.5258914555045733, + "learning_rate": 2.719660947326751e-06, + "loss": 0.5107, + "step": 9318 + }, + { + "epoch": 0.66, + "grad_norm": 1.5693644908111686, + "learning_rate": 2.7186383121354386e-06, + "loss": 0.4954, + "step": 9319 + }, + { + "epoch": 0.66, + "grad_norm": 2.1519957647444548, + "learning_rate": 2.717615797453381e-06, + "loss": 0.5348, + "step": 9320 + }, + { + "epoch": 0.66, + "grad_norm": 1.7871406512818238, + "learning_rate": 2.71659340333459e-06, + "loss": 0.5397, + "step": 9321 + }, + { + "epoch": 0.66, + "grad_norm": 1.6815042267837494, + "learning_rate": 2.7155711298330756e-06, + "loss": 0.5023, + "step": 9322 + }, + { + "epoch": 0.66, + "grad_norm": 1.6826848128405918, + "learning_rate": 2.7145489770028305e-06, + "loss": 0.5285, + "step": 9323 + }, + { + "epoch": 0.66, + "grad_norm": 1.7264827528897124, + "learning_rate": 2.7135269448978565e-06, + "loss": 0.4265, + "step": 9324 + }, + { + "epoch": 0.66, + "grad_norm": 2.6764825799656937, + "learning_rate": 2.712505033572135e-06, + "loss": 0.4423, + "step": 9325 + }, + { + "epoch": 0.66, + "grad_norm": 0.832978465528351, + "learning_rate": 2.711483243079649e-06, + "loss": 0.4564, + "step": 9326 + }, + { + "epoch": 0.66, + "grad_norm": 1.8611549034013508, + "learning_rate": 2.7104615734743723e-06, + "loss": 0.5406, + "step": 9327 + }, + { + "epoch": 0.66, + "grad_norm": 1.7117965995619921, + "learning_rate": 2.709440024810273e-06, + "loss": 0.5313, + "step": 9328 + }, + { + "epoch": 0.66, + "grad_norm": 1.694823182443523, + "learning_rate": 2.708418597141316e-06, + "loss": 0.4875, + "step": 9329 + }, + { + "epoch": 0.66, + "grad_norm": 1.722797912156979, + "learning_rate": 2.7073972905214507e-06, + "loss": 0.573, + "step": 9330 + }, + { + "epoch": 0.66, + "grad_norm": 1.5285703612318093, + "learning_rate": 2.7063761050046287e-06, + "loss": 0.4905, + "step": 9331 + }, + { + "epoch": 0.66, + "grad_norm": 0.7965520956129154, + "learning_rate": 2.7053550406447936e-06, + "loss": 0.4244, + "step": 9332 + }, + { + "epoch": 0.66, + "grad_norm": 1.8732292708925156, + "learning_rate": 2.70433409749588e-06, + "loss": 0.5485, + "step": 9333 + }, + { + "epoch": 0.66, + "grad_norm": 1.5521628933093334, + "learning_rate": 2.703313275611821e-06, + "loss": 0.5007, + "step": 9334 + }, + { + "epoch": 0.66, + "grad_norm": 1.471029445740381, + "learning_rate": 2.7022925750465324e-06, + "loss": 0.5308, + "step": 9335 + }, + { + "epoch": 0.66, + "grad_norm": 1.580520502223484, + "learning_rate": 2.7012719958539397e-06, + "loss": 0.5548, + "step": 9336 + }, + { + "epoch": 0.66, + "grad_norm": 1.7484873422133842, + "learning_rate": 2.700251538087947e-06, + "loss": 0.5385, + "step": 9337 + }, + { + "epoch": 0.66, + "grad_norm": 1.5348742822199164, + "learning_rate": 2.699231201802461e-06, + "loss": 0.4917, + "step": 9338 + }, + { + "epoch": 0.66, + "grad_norm": 1.7893554788990624, + "learning_rate": 2.698210987051379e-06, + "loss": 0.5363, + "step": 9339 + }, + { + "epoch": 0.66, + "grad_norm": 1.8525944779731645, + "learning_rate": 2.6971908938885927e-06, + "loss": 0.4514, + "step": 9340 + }, + { + "epoch": 0.66, + "grad_norm": 1.8819884842467036, + "learning_rate": 2.696170922367988e-06, + "loss": 0.5048, + "step": 9341 + }, + { + "epoch": 0.66, + "grad_norm": 1.6370273642950401, + "learning_rate": 2.6951510725434373e-06, + "loss": 0.5129, + "step": 9342 + }, + { + "epoch": 0.66, + "grad_norm": 2.310070122054087, + "learning_rate": 2.6941313444688204e-06, + "loss": 0.5006, + "step": 9343 + }, + { + "epoch": 0.66, + "grad_norm": 1.6524659510356103, + "learning_rate": 2.6931117381979975e-06, + "loss": 0.4788, + "step": 9344 + }, + { + "epoch": 0.66, + "grad_norm": 2.0474619403926475, + "learning_rate": 2.692092253784832e-06, + "loss": 0.5272, + "step": 9345 + }, + { + "epoch": 0.66, + "grad_norm": 1.6255214084003808, + "learning_rate": 2.6910728912831694e-06, + "loss": 0.5014, + "step": 9346 + }, + { + "epoch": 0.66, + "grad_norm": 1.7867970489126048, + "learning_rate": 2.690053650746865e-06, + "loss": 0.5607, + "step": 9347 + }, + { + "epoch": 0.66, + "grad_norm": 1.9125258318695169, + "learning_rate": 2.6890345322297517e-06, + "loss": 0.5444, + "step": 9348 + }, + { + "epoch": 0.66, + "grad_norm": 1.7523316153532298, + "learning_rate": 2.688015535785665e-06, + "loss": 0.6388, + "step": 9349 + }, + { + "epoch": 0.66, + "grad_norm": 2.0060448983972936, + "learning_rate": 2.6869966614684316e-06, + "loss": 0.4551, + "step": 9350 + }, + { + "epoch": 0.66, + "grad_norm": 2.2568151691213374, + "learning_rate": 2.685977909331872e-06, + "loss": 0.5394, + "step": 9351 + }, + { + "epoch": 0.66, + "grad_norm": 1.6814779094344485, + "learning_rate": 2.684959279429802e-06, + "loss": 0.5983, + "step": 9352 + }, + { + "epoch": 0.66, + "grad_norm": 1.5371419736291012, + "learning_rate": 2.683940771816026e-06, + "loss": 0.5674, + "step": 9353 + }, + { + "epoch": 0.66, + "grad_norm": 2.2540789291162486, + "learning_rate": 2.682922386544346e-06, + "loss": 0.5012, + "step": 9354 + }, + { + "epoch": 0.66, + "grad_norm": 1.7525923367612886, + "learning_rate": 2.6819041236685567e-06, + "loss": 0.5251, + "step": 9355 + }, + { + "epoch": 0.66, + "grad_norm": 1.7750315002687527, + "learning_rate": 2.6808859832424468e-06, + "loss": 0.5368, + "step": 9356 + }, + { + "epoch": 0.66, + "grad_norm": 2.0989793739943554, + "learning_rate": 2.6798679653197966e-06, + "loss": 0.5852, + "step": 9357 + }, + { + "epoch": 0.66, + "grad_norm": 0.6843911610005086, + "learning_rate": 2.678850069954383e-06, + "loss": 0.4249, + "step": 9358 + }, + { + "epoch": 0.66, + "grad_norm": 1.9690874698783156, + "learning_rate": 2.6778322971999758e-06, + "loss": 0.5316, + "step": 9359 + }, + { + "epoch": 0.66, + "grad_norm": 1.6356944238602544, + "learning_rate": 2.676814647110333e-06, + "loss": 0.5014, + "step": 9360 + }, + { + "epoch": 0.66, + "grad_norm": 1.5107126430462452, + "learning_rate": 2.6757971197392125e-06, + "loss": 0.4957, + "step": 9361 + }, + { + "epoch": 0.66, + "grad_norm": 1.5837786806537586, + "learning_rate": 2.6747797151403635e-06, + "loss": 0.5875, + "step": 9362 + }, + { + "epoch": 0.66, + "grad_norm": 1.936127848414354, + "learning_rate": 2.6737624333675294e-06, + "loss": 0.5112, + "step": 9363 + }, + { + "epoch": 0.66, + "grad_norm": 1.6101981232695142, + "learning_rate": 2.6727452744744477e-06, + "loss": 0.4812, + "step": 9364 + }, + { + "epoch": 0.66, + "grad_norm": 2.567099088883363, + "learning_rate": 2.671728238514842e-06, + "loss": 0.5142, + "step": 9365 + }, + { + "epoch": 0.66, + "grad_norm": 1.8245813232009847, + "learning_rate": 2.6707113255424446e-06, + "loss": 0.5393, + "step": 9366 + }, + { + "epoch": 0.66, + "grad_norm": 1.8684000690400189, + "learning_rate": 2.669694535610965e-06, + "loss": 0.5721, + "step": 9367 + }, + { + "epoch": 0.66, + "grad_norm": 1.7885652942674504, + "learning_rate": 2.6686778687741167e-06, + "loss": 0.5651, + "step": 9368 + }, + { + "epoch": 0.66, + "grad_norm": 1.648879552695787, + "learning_rate": 2.6676613250856034e-06, + "loss": 0.4819, + "step": 9369 + }, + { + "epoch": 0.66, + "grad_norm": 6.763955154289071, + "learning_rate": 2.6666449045991207e-06, + "loss": 0.4791, + "step": 9370 + }, + { + "epoch": 0.66, + "grad_norm": 2.152722224370005, + "learning_rate": 2.6656286073683637e-06, + "loss": 0.439, + "step": 9371 + }, + { + "epoch": 0.67, + "grad_norm": 1.9422984690133138, + "learning_rate": 2.6646124334470093e-06, + "loss": 0.5454, + "step": 9372 + }, + { + "epoch": 0.67, + "grad_norm": 2.9126048547479577, + "learning_rate": 2.6635963828887434e-06, + "loss": 0.4539, + "step": 9373 + }, + { + "epoch": 0.67, + "grad_norm": 1.4969257087155272, + "learning_rate": 2.6625804557472314e-06, + "loss": 0.5012, + "step": 9374 + }, + { + "epoch": 0.67, + "grad_norm": 1.8691573805423851, + "learning_rate": 2.6615646520761416e-06, + "loss": 0.5924, + "step": 9375 + }, + { + "epoch": 0.67, + "grad_norm": 1.707156075484976, + "learning_rate": 2.660548971929127e-06, + "loss": 0.5168, + "step": 9376 + }, + { + "epoch": 0.67, + "grad_norm": 1.7506782915511374, + "learning_rate": 2.659533415359845e-06, + "loss": 0.5233, + "step": 9377 + }, + { + "epoch": 0.67, + "grad_norm": 3.6306477722123436, + "learning_rate": 2.6585179824219404e-06, + "loss": 0.5731, + "step": 9378 + }, + { + "epoch": 0.67, + "grad_norm": 1.5320841232970568, + "learning_rate": 2.6575026731690477e-06, + "loss": 0.5387, + "step": 9379 + }, + { + "epoch": 0.67, + "grad_norm": 1.9250511403363815, + "learning_rate": 2.6564874876548017e-06, + "loss": 0.5901, + "step": 9380 + }, + { + "epoch": 0.67, + "grad_norm": 2.0367193087793094, + "learning_rate": 2.655472425932828e-06, + "loss": 0.5915, + "step": 9381 + }, + { + "epoch": 0.67, + "grad_norm": 1.7628790226768085, + "learning_rate": 2.6544574880567462e-06, + "loss": 0.5419, + "step": 9382 + }, + { + "epoch": 0.67, + "grad_norm": 1.7755247051803598, + "learning_rate": 2.6534426740801656e-06, + "loss": 0.5662, + "step": 9383 + }, + { + "epoch": 0.67, + "grad_norm": 1.9728866560893619, + "learning_rate": 2.6524279840566945e-06, + "loss": 0.5328, + "step": 9384 + }, + { + "epoch": 0.67, + "grad_norm": 1.6309847498070371, + "learning_rate": 2.651413418039932e-06, + "loss": 0.4825, + "step": 9385 + }, + { + "epoch": 0.67, + "grad_norm": 1.7754392779482857, + "learning_rate": 2.6503989760834715e-06, + "loss": 0.4626, + "step": 9386 + }, + { + "epoch": 0.67, + "grad_norm": 1.5216796274296036, + "learning_rate": 2.649384658240898e-06, + "loss": 0.4962, + "step": 9387 + }, + { + "epoch": 0.67, + "grad_norm": 2.483726980438515, + "learning_rate": 2.6483704645657917e-06, + "loss": 0.4461, + "step": 9388 + }, + { + "epoch": 0.67, + "grad_norm": 1.972131420989064, + "learning_rate": 2.6473563951117276e-06, + "loss": 0.5476, + "step": 9389 + }, + { + "epoch": 0.67, + "grad_norm": 1.6762656868956984, + "learning_rate": 2.6463424499322687e-06, + "loss": 0.5107, + "step": 9390 + }, + { + "epoch": 0.67, + "grad_norm": 0.708621822418427, + "learning_rate": 2.645328629080977e-06, + "loss": 0.4317, + "step": 9391 + }, + { + "epoch": 0.67, + "grad_norm": 2.165464534176344, + "learning_rate": 2.644314932611406e-06, + "loss": 0.4934, + "step": 9392 + }, + { + "epoch": 0.67, + "grad_norm": 0.7542945465405825, + "learning_rate": 2.643301360577102e-06, + "loss": 0.4435, + "step": 9393 + }, + { + "epoch": 0.67, + "grad_norm": 1.8020095306104078, + "learning_rate": 2.642287913031608e-06, + "loss": 0.4854, + "step": 9394 + }, + { + "epoch": 0.67, + "grad_norm": 1.667704427705805, + "learning_rate": 2.641274590028452e-06, + "loss": 0.4727, + "step": 9395 + }, + { + "epoch": 0.67, + "grad_norm": 1.5017919037242102, + "learning_rate": 2.6402613916211684e-06, + "loss": 0.5165, + "step": 9396 + }, + { + "epoch": 0.67, + "grad_norm": 1.619905483132893, + "learning_rate": 2.639248317863271e-06, + "loss": 0.6024, + "step": 9397 + }, + { + "epoch": 0.67, + "grad_norm": 2.0077043337989315, + "learning_rate": 2.6382353688082778e-06, + "loss": 0.4749, + "step": 9398 + }, + { + "epoch": 0.67, + "grad_norm": 0.6955360646684536, + "learning_rate": 2.6372225445096944e-06, + "loss": 0.4215, + "step": 9399 + }, + { + "epoch": 0.67, + "grad_norm": 1.9761185471217073, + "learning_rate": 2.6362098450210232e-06, + "loss": 0.5906, + "step": 9400 + }, + { + "epoch": 0.67, + "grad_norm": 1.8402587661000904, + "learning_rate": 2.635197270395759e-06, + "loss": 0.5531, + "step": 9401 + }, + { + "epoch": 0.67, + "grad_norm": 3.133315185894663, + "learning_rate": 2.6341848206873843e-06, + "loss": 0.5569, + "step": 9402 + }, + { + "epoch": 0.67, + "grad_norm": 2.322999908109515, + "learning_rate": 2.6331724959493875e-06, + "loss": 0.5171, + "step": 9403 + }, + { + "epoch": 0.67, + "grad_norm": 2.3725694964344233, + "learning_rate": 2.632160296235238e-06, + "loss": 0.5691, + "step": 9404 + }, + { + "epoch": 0.67, + "grad_norm": 1.720015607072126, + "learning_rate": 2.6311482215984046e-06, + "loss": 0.5959, + "step": 9405 + }, + { + "epoch": 0.67, + "grad_norm": 1.7551855073394211, + "learning_rate": 2.630136272092349e-06, + "loss": 0.5191, + "step": 9406 + }, + { + "epoch": 0.67, + "grad_norm": 1.9205755823336972, + "learning_rate": 2.6291244477705258e-06, + "loss": 0.5108, + "step": 9407 + }, + { + "epoch": 0.67, + "grad_norm": 1.6106247792856978, + "learning_rate": 2.6281127486863846e-06, + "loss": 0.5056, + "step": 9408 + }, + { + "epoch": 0.67, + "grad_norm": 1.6019931154317864, + "learning_rate": 2.6271011748933627e-06, + "loss": 0.5184, + "step": 9409 + }, + { + "epoch": 0.67, + "grad_norm": 2.52329924799111, + "learning_rate": 2.626089726444898e-06, + "loss": 0.5931, + "step": 9410 + }, + { + "epoch": 0.67, + "grad_norm": 1.8922648967243674, + "learning_rate": 2.6250784033944177e-06, + "loss": 0.5644, + "step": 9411 + }, + { + "epoch": 0.67, + "grad_norm": 2.766223840924474, + "learning_rate": 2.6240672057953452e-06, + "loss": 0.5199, + "step": 9412 + }, + { + "epoch": 0.67, + "grad_norm": 1.6536856463364034, + "learning_rate": 2.6230561337010916e-06, + "loss": 0.4583, + "step": 9413 + }, + { + "epoch": 0.67, + "grad_norm": 2.581675097498057, + "learning_rate": 2.6220451871650674e-06, + "loss": 0.5486, + "step": 9414 + }, + { + "epoch": 0.67, + "grad_norm": 2.7255320494273776, + "learning_rate": 2.6210343662406746e-06, + "loss": 0.4982, + "step": 9415 + }, + { + "epoch": 0.67, + "grad_norm": 1.5450549838378127, + "learning_rate": 2.6200236709813063e-06, + "loss": 0.47, + "step": 9416 + }, + { + "epoch": 0.67, + "grad_norm": 1.5719209700293362, + "learning_rate": 2.6190131014403553e-06, + "loss": 0.4797, + "step": 9417 + }, + { + "epoch": 0.67, + "grad_norm": 1.8393266579014138, + "learning_rate": 2.618002657671196e-06, + "loss": 0.5697, + "step": 9418 + }, + { + "epoch": 0.67, + "grad_norm": 4.137594834059182, + "learning_rate": 2.616992339727211e-06, + "loss": 0.547, + "step": 9419 + }, + { + "epoch": 0.67, + "grad_norm": 2.280693140997432, + "learning_rate": 2.6159821476617637e-06, + "loss": 0.4904, + "step": 9420 + }, + { + "epoch": 0.67, + "grad_norm": 1.8123526476827017, + "learning_rate": 2.6149720815282176e-06, + "loss": 0.595, + "step": 9421 + }, + { + "epoch": 0.67, + "grad_norm": 1.8847424908189572, + "learning_rate": 2.613962141379928e-06, + "loss": 0.5233, + "step": 9422 + }, + { + "epoch": 0.67, + "grad_norm": 0.7367662600057733, + "learning_rate": 2.6129523272702422e-06, + "loss": 0.4309, + "step": 9423 + }, + { + "epoch": 0.67, + "grad_norm": 1.680444664528618, + "learning_rate": 2.6119426392525053e-06, + "loss": 0.5823, + "step": 9424 + }, + { + "epoch": 0.67, + "grad_norm": 1.6029475928022763, + "learning_rate": 2.6109330773800466e-06, + "loss": 0.5891, + "step": 9425 + }, + { + "epoch": 0.67, + "grad_norm": 2.081789554583414, + "learning_rate": 2.6099236417062013e-06, + "loss": 0.5082, + "step": 9426 + }, + { + "epoch": 0.67, + "grad_norm": 4.907833550623655, + "learning_rate": 2.6089143322842863e-06, + "loss": 0.579, + "step": 9427 + }, + { + "epoch": 0.67, + "grad_norm": 1.6286104842076286, + "learning_rate": 2.607905149167619e-06, + "loss": 0.5293, + "step": 9428 + }, + { + "epoch": 0.67, + "grad_norm": 1.7738339715338716, + "learning_rate": 2.606896092409506e-06, + "loss": 0.5268, + "step": 9429 + }, + { + "epoch": 0.67, + "grad_norm": 1.9732824688826989, + "learning_rate": 2.605887162063251e-06, + "loss": 0.5628, + "step": 9430 + }, + { + "epoch": 0.67, + "grad_norm": 1.8938000553833283, + "learning_rate": 2.60487835818215e-06, + "loss": 0.4734, + "step": 9431 + }, + { + "epoch": 0.67, + "grad_norm": 1.7642685339128816, + "learning_rate": 2.6038696808194886e-06, + "loss": 0.5328, + "step": 9432 + }, + { + "epoch": 0.67, + "grad_norm": 2.020182967608077, + "learning_rate": 2.6028611300285496e-06, + "loss": 0.5759, + "step": 9433 + }, + { + "epoch": 0.67, + "grad_norm": 3.5154605846524563, + "learning_rate": 2.601852705862608e-06, + "loss": 0.5749, + "step": 9434 + }, + { + "epoch": 0.67, + "grad_norm": 1.998371151999566, + "learning_rate": 2.600844408374933e-06, + "loss": 0.5418, + "step": 9435 + }, + { + "epoch": 0.67, + "grad_norm": 1.8043891956131017, + "learning_rate": 2.5998362376187852e-06, + "loss": 0.4581, + "step": 9436 + }, + { + "epoch": 0.67, + "grad_norm": 1.845530234977172, + "learning_rate": 2.598828193647421e-06, + "loss": 0.5612, + "step": 9437 + }, + { + "epoch": 0.67, + "grad_norm": 1.6788507557295131, + "learning_rate": 2.597820276514089e-06, + "loss": 0.4858, + "step": 9438 + }, + { + "epoch": 0.67, + "grad_norm": 1.9792954891265269, + "learning_rate": 2.596812486272028e-06, + "loss": 0.53, + "step": 9439 + }, + { + "epoch": 0.67, + "grad_norm": 1.7373057541308596, + "learning_rate": 2.5958048229744747e-06, + "loss": 0.5404, + "step": 9440 + }, + { + "epoch": 0.67, + "grad_norm": 2.339298134494633, + "learning_rate": 2.5947972866746573e-06, + "loss": 0.5593, + "step": 9441 + }, + { + "epoch": 0.67, + "grad_norm": 1.6172184720769516, + "learning_rate": 2.5937898774257993e-06, + "loss": 0.5508, + "step": 9442 + }, + { + "epoch": 0.67, + "grad_norm": 1.6498094604726037, + "learning_rate": 2.592782595281111e-06, + "loss": 0.5134, + "step": 9443 + }, + { + "epoch": 0.67, + "grad_norm": 1.7213098754311706, + "learning_rate": 2.591775440293801e-06, + "loss": 0.5075, + "step": 9444 + }, + { + "epoch": 0.67, + "grad_norm": 1.6207791465240433, + "learning_rate": 2.5907684125170764e-06, + "loss": 0.5044, + "step": 9445 + }, + { + "epoch": 0.67, + "grad_norm": 0.6736754203347862, + "learning_rate": 2.5897615120041254e-06, + "loss": 0.4163, + "step": 9446 + }, + { + "epoch": 0.67, + "grad_norm": 1.624793886944733, + "learning_rate": 2.58875473880814e-06, + "loss": 0.5421, + "step": 9447 + }, + { + "epoch": 0.67, + "grad_norm": 0.7945752530063672, + "learning_rate": 2.5877480929822975e-06, + "loss": 0.4583, + "step": 9448 + }, + { + "epoch": 0.67, + "grad_norm": 1.9211719659216349, + "learning_rate": 2.586741574579777e-06, + "loss": 0.4714, + "step": 9449 + }, + { + "epoch": 0.67, + "grad_norm": 1.7740340453493817, + "learning_rate": 2.585735183653742e-06, + "loss": 0.5802, + "step": 9450 + }, + { + "epoch": 0.67, + "grad_norm": 1.6438096084255551, + "learning_rate": 2.5847289202573556e-06, + "loss": 0.4885, + "step": 9451 + }, + { + "epoch": 0.67, + "grad_norm": 2.3279498691081817, + "learning_rate": 2.583722784443772e-06, + "loss": 0.568, + "step": 9452 + }, + { + "epoch": 0.67, + "grad_norm": 0.6967730817955221, + "learning_rate": 2.5827167762661375e-06, + "loss": 0.4427, + "step": 9453 + }, + { + "epoch": 0.67, + "grad_norm": 1.6935689450566607, + "learning_rate": 2.581710895777596e-06, + "loss": 0.5223, + "step": 9454 + }, + { + "epoch": 0.67, + "grad_norm": 1.7229871431083046, + "learning_rate": 2.5807051430312757e-06, + "loss": 0.5635, + "step": 9455 + }, + { + "epoch": 0.67, + "grad_norm": 0.7324760060440557, + "learning_rate": 2.579699518080311e-06, + "loss": 0.427, + "step": 9456 + }, + { + "epoch": 0.67, + "grad_norm": 0.7059640199939262, + "learning_rate": 2.578694020977817e-06, + "loss": 0.445, + "step": 9457 + }, + { + "epoch": 0.67, + "grad_norm": 2.4020426232563015, + "learning_rate": 2.5776886517769096e-06, + "loss": 0.6046, + "step": 9458 + }, + { + "epoch": 0.67, + "grad_norm": 1.9229153322093986, + "learning_rate": 2.576683410530695e-06, + "loss": 0.4881, + "step": 9459 + }, + { + "epoch": 0.67, + "grad_norm": 1.8051396010963872, + "learning_rate": 2.5756782972922743e-06, + "loss": 0.5431, + "step": 9460 + }, + { + "epoch": 0.67, + "grad_norm": 1.7327483046434844, + "learning_rate": 2.5746733121147427e-06, + "loss": 0.5619, + "step": 9461 + }, + { + "epoch": 0.67, + "grad_norm": 1.52122955396837, + "learning_rate": 2.5736684550511824e-06, + "loss": 0.4914, + "step": 9462 + }, + { + "epoch": 0.67, + "grad_norm": 1.4510721707049927, + "learning_rate": 2.572663726154676e-06, + "loss": 0.4537, + "step": 9463 + }, + { + "epoch": 0.67, + "grad_norm": 1.483829631160677, + "learning_rate": 2.5716591254782976e-06, + "loss": 0.53, + "step": 9464 + }, + { + "epoch": 0.67, + "grad_norm": 1.4531050308737308, + "learning_rate": 2.570654653075111e-06, + "loss": 0.5145, + "step": 9465 + }, + { + "epoch": 0.67, + "grad_norm": 1.6527268485830084, + "learning_rate": 2.56965030899818e-06, + "loss": 0.4879, + "step": 9466 + }, + { + "epoch": 0.67, + "grad_norm": 1.6253567652311904, + "learning_rate": 2.5686460933005507e-06, + "loss": 0.5276, + "step": 9467 + }, + { + "epoch": 0.67, + "grad_norm": 0.7727776089497317, + "learning_rate": 2.567642006035278e-06, + "loss": 0.4687, + "step": 9468 + }, + { + "epoch": 0.67, + "grad_norm": 1.7084362372291313, + "learning_rate": 2.5666380472553937e-06, + "loss": 0.603, + "step": 9469 + }, + { + "epoch": 0.67, + "grad_norm": 1.8033910548066474, + "learning_rate": 2.5656342170139337e-06, + "loss": 0.5501, + "step": 9470 + }, + { + "epoch": 0.67, + "grad_norm": 2.088318484360144, + "learning_rate": 2.564630515363923e-06, + "loss": 0.553, + "step": 9471 + }, + { + "epoch": 0.67, + "grad_norm": 1.4394327090730423, + "learning_rate": 2.5636269423583797e-06, + "loss": 0.498, + "step": 9472 + }, + { + "epoch": 0.67, + "grad_norm": 1.8393807634961048, + "learning_rate": 2.5626234980503194e-06, + "loss": 0.545, + "step": 9473 + }, + { + "epoch": 0.67, + "grad_norm": 2.0113717395339137, + "learning_rate": 2.561620182492741e-06, + "loss": 0.5932, + "step": 9474 + }, + { + "epoch": 0.67, + "grad_norm": 1.9480433883782802, + "learning_rate": 2.5606169957386517e-06, + "loss": 0.5094, + "step": 9475 + }, + { + "epoch": 0.67, + "grad_norm": 1.7760046490703643, + "learning_rate": 2.5596139378410356e-06, + "loss": 0.5514, + "step": 9476 + }, + { + "epoch": 0.67, + "grad_norm": 1.6384559507421317, + "learning_rate": 2.5586110088528826e-06, + "loss": 0.5594, + "step": 9477 + }, + { + "epoch": 0.67, + "grad_norm": 1.8881217252930413, + "learning_rate": 2.557608208827165e-06, + "loss": 0.5698, + "step": 9478 + }, + { + "epoch": 0.67, + "grad_norm": 3.0398247252978576, + "learning_rate": 2.5566055378168607e-06, + "loss": 0.5178, + "step": 9479 + }, + { + "epoch": 0.67, + "grad_norm": 1.7891940371001271, + "learning_rate": 2.55560299587493e-06, + "loss": 0.5425, + "step": 9480 + }, + { + "epoch": 0.67, + "grad_norm": 2.0900382668957946, + "learning_rate": 2.554600583054332e-06, + "loss": 0.5507, + "step": 9481 + }, + { + "epoch": 0.67, + "grad_norm": 1.9520235527169394, + "learning_rate": 2.5535982994080173e-06, + "loss": 0.4997, + "step": 9482 + }, + { + "epoch": 0.67, + "grad_norm": 1.550274041685687, + "learning_rate": 2.552596144988929e-06, + "loss": 0.5131, + "step": 9483 + }, + { + "epoch": 0.67, + "grad_norm": 1.6737042034419627, + "learning_rate": 2.5515941198500084e-06, + "loss": 0.4993, + "step": 9484 + }, + { + "epoch": 0.67, + "grad_norm": 1.680866337687511, + "learning_rate": 2.5505922240441782e-06, + "loss": 0.5265, + "step": 9485 + }, + { + "epoch": 0.67, + "grad_norm": 1.5727894325858913, + "learning_rate": 2.5495904576243714e-06, + "loss": 0.5632, + "step": 9486 + }, + { + "epoch": 0.67, + "grad_norm": 1.6767497017004442, + "learning_rate": 2.548588820643496e-06, + "loss": 0.5377, + "step": 9487 + }, + { + "epoch": 0.67, + "grad_norm": 1.6200520076198297, + "learning_rate": 2.547587313154467e-06, + "loss": 0.5873, + "step": 9488 + }, + { + "epoch": 0.67, + "grad_norm": 1.8266796533073957, + "learning_rate": 2.5465859352101855e-06, + "loss": 0.5338, + "step": 9489 + }, + { + "epoch": 0.67, + "grad_norm": 1.8158208529541568, + "learning_rate": 2.545584686863548e-06, + "loss": 0.5787, + "step": 9490 + }, + { + "epoch": 0.67, + "grad_norm": 1.6145414603972545, + "learning_rate": 2.544583568167447e-06, + "loss": 0.4998, + "step": 9491 + }, + { + "epoch": 0.67, + "grad_norm": 1.687926448148595, + "learning_rate": 2.543582579174759e-06, + "loss": 0.47, + "step": 9492 + }, + { + "epoch": 0.67, + "grad_norm": 1.6457384371363428, + "learning_rate": 2.542581719938363e-06, + "loss": 0.5549, + "step": 9493 + }, + { + "epoch": 0.67, + "grad_norm": 0.7023248187836227, + "learning_rate": 2.541580990511128e-06, + "loss": 0.4519, + "step": 9494 + }, + { + "epoch": 0.67, + "grad_norm": 1.610444745996479, + "learning_rate": 2.5405803909459144e-06, + "loss": 0.5982, + "step": 9495 + }, + { + "epoch": 0.67, + "grad_norm": 0.674970176760348, + "learning_rate": 2.5395799212955807e-06, + "loss": 0.4403, + "step": 9496 + }, + { + "epoch": 0.67, + "grad_norm": 0.7358487555138075, + "learning_rate": 2.538579581612968e-06, + "loss": 0.401, + "step": 9497 + }, + { + "epoch": 0.67, + "grad_norm": 1.9786864200332772, + "learning_rate": 2.537579371950927e-06, + "loss": 0.4738, + "step": 9498 + }, + { + "epoch": 0.67, + "grad_norm": 1.8772472975570638, + "learning_rate": 2.536579292362285e-06, + "loss": 0.5423, + "step": 9499 + }, + { + "epoch": 0.67, + "grad_norm": 1.6514954900327883, + "learning_rate": 2.535579342899872e-06, + "loss": 0.5458, + "step": 9500 + }, + { + "epoch": 0.67, + "grad_norm": 1.747457513047944, + "learning_rate": 2.534579523616509e-06, + "loss": 0.5434, + "step": 9501 + }, + { + "epoch": 0.67, + "grad_norm": 1.9813805648455791, + "learning_rate": 2.533579834565009e-06, + "loss": 0.5011, + "step": 9502 + }, + { + "epoch": 0.67, + "grad_norm": 1.783658793666206, + "learning_rate": 2.5325802757981823e-06, + "loss": 0.5515, + "step": 9503 + }, + { + "epoch": 0.67, + "grad_norm": 1.8777267092642822, + "learning_rate": 2.531580847368822e-06, + "loss": 0.5845, + "step": 9504 + }, + { + "epoch": 0.67, + "grad_norm": 2.017657819652052, + "learning_rate": 2.5305815493297286e-06, + "loss": 0.5056, + "step": 9505 + }, + { + "epoch": 0.67, + "grad_norm": 0.7296420022061271, + "learning_rate": 2.529582381733684e-06, + "loss": 0.4256, + "step": 9506 + }, + { + "epoch": 0.67, + "grad_norm": 1.6619169096277253, + "learning_rate": 2.5285833446334705e-06, + "loss": 0.5277, + "step": 9507 + }, + { + "epoch": 0.67, + "grad_norm": 1.6814349946899256, + "learning_rate": 2.527584438081856e-06, + "loss": 0.4953, + "step": 9508 + }, + { + "epoch": 0.67, + "grad_norm": 1.592925707335148, + "learning_rate": 2.5265856621316102e-06, + "loss": 0.5021, + "step": 9509 + }, + { + "epoch": 0.67, + "grad_norm": 0.753231769233868, + "learning_rate": 2.5255870168354933e-06, + "loss": 0.4419, + "step": 9510 + }, + { + "epoch": 0.67, + "grad_norm": 2.053864038502355, + "learning_rate": 2.5245885022462523e-06, + "loss": 0.5488, + "step": 9511 + }, + { + "epoch": 0.67, + "grad_norm": 1.659879164129113, + "learning_rate": 2.5235901184166346e-06, + "loss": 0.5053, + "step": 9512 + }, + { + "epoch": 0.68, + "grad_norm": 1.9932880817028733, + "learning_rate": 2.522591865399378e-06, + "loss": 0.5655, + "step": 9513 + }, + { + "epoch": 0.68, + "grad_norm": 2.023091592823052, + "learning_rate": 2.521593743247216e-06, + "loss": 0.5084, + "step": 9514 + }, + { + "epoch": 0.68, + "grad_norm": 1.7180139281185067, + "learning_rate": 2.520595752012866e-06, + "loss": 0.554, + "step": 9515 + }, + { + "epoch": 0.68, + "grad_norm": 0.7609612794144697, + "learning_rate": 2.5195978917490537e-06, + "loss": 0.4346, + "step": 9516 + }, + { + "epoch": 0.68, + "grad_norm": 1.641154249846229, + "learning_rate": 2.5186001625084843e-06, + "loss": 0.5238, + "step": 9517 + }, + { + "epoch": 0.68, + "grad_norm": 1.375778202185351, + "learning_rate": 2.5176025643438617e-06, + "loss": 0.4477, + "step": 9518 + }, + { + "epoch": 0.68, + "grad_norm": 0.7336396538417131, + "learning_rate": 2.5166050973078837e-06, + "loss": 0.4299, + "step": 9519 + }, + { + "epoch": 0.68, + "grad_norm": 3.287552738310857, + "learning_rate": 2.5156077614532386e-06, + "loss": 0.5369, + "step": 9520 + }, + { + "epoch": 0.68, + "grad_norm": 0.7713769540425621, + "learning_rate": 2.5146105568326124e-06, + "loss": 0.42, + "step": 9521 + }, + { + "epoch": 0.68, + "grad_norm": 1.7049407892302118, + "learning_rate": 2.5136134834986757e-06, + "loss": 0.517, + "step": 9522 + }, + { + "epoch": 0.68, + "grad_norm": 1.9442216339101994, + "learning_rate": 2.5126165415041003e-06, + "loss": 0.4659, + "step": 9523 + }, + { + "epoch": 0.68, + "grad_norm": 3.2863669705743144, + "learning_rate": 2.5116197309015476e-06, + "loss": 0.5223, + "step": 9524 + }, + { + "epoch": 0.68, + "grad_norm": 1.9373991814528804, + "learning_rate": 2.5106230517436725e-06, + "loss": 0.5539, + "step": 9525 + }, + { + "epoch": 0.68, + "grad_norm": 1.9943180010617376, + "learning_rate": 2.5096265040831245e-06, + "loss": 0.488, + "step": 9526 + }, + { + "epoch": 0.68, + "grad_norm": 1.7709808569931884, + "learning_rate": 2.50863008797254e-06, + "loss": 0.5166, + "step": 9527 + }, + { + "epoch": 0.68, + "grad_norm": 1.928882090532752, + "learning_rate": 2.5076338034645597e-06, + "loss": 0.5381, + "step": 9528 + }, + { + "epoch": 0.68, + "grad_norm": 1.6230732440384907, + "learning_rate": 2.506637650611805e-06, + "loss": 0.5475, + "step": 9529 + }, + { + "epoch": 0.68, + "grad_norm": 1.6709327335313024, + "learning_rate": 2.5056416294668984e-06, + "loss": 0.5254, + "step": 9530 + }, + { + "epoch": 0.68, + "grad_norm": 1.6257774698593597, + "learning_rate": 2.504645740082453e-06, + "loss": 0.5147, + "step": 9531 + }, + { + "epoch": 0.68, + "grad_norm": 0.8388249876566426, + "learning_rate": 2.503649982511075e-06, + "loss": 0.4499, + "step": 9532 + }, + { + "epoch": 0.68, + "grad_norm": 1.80968184389095, + "learning_rate": 2.502654356805365e-06, + "loss": 0.4526, + "step": 9533 + }, + { + "epoch": 0.68, + "grad_norm": 1.7729032330510688, + "learning_rate": 2.501658863017911e-06, + "loss": 0.5156, + "step": 9534 + }, + { + "epoch": 0.68, + "grad_norm": 0.7352275364190343, + "learning_rate": 2.5006635012013047e-06, + "loss": 0.4162, + "step": 9535 + }, + { + "epoch": 0.68, + "grad_norm": 1.9480841117748162, + "learning_rate": 2.49966827140812e-06, + "loss": 0.5479, + "step": 9536 + }, + { + "epoch": 0.68, + "grad_norm": 1.8445906108264996, + "learning_rate": 2.4986731736909293e-06, + "loss": 0.5168, + "step": 9537 + }, + { + "epoch": 0.68, + "grad_norm": 1.9230596095204575, + "learning_rate": 2.4976782081022966e-06, + "loss": 0.5416, + "step": 9538 + }, + { + "epoch": 0.68, + "grad_norm": 1.4914599300101168, + "learning_rate": 2.4966833746947807e-06, + "loss": 0.5196, + "step": 9539 + }, + { + "epoch": 0.68, + "grad_norm": 1.7613411500829173, + "learning_rate": 2.495688673520933e-06, + "loss": 0.5552, + "step": 9540 + }, + { + "epoch": 0.68, + "grad_norm": 1.8615093903965423, + "learning_rate": 2.494694104633293e-06, + "loss": 0.5429, + "step": 9541 + }, + { + "epoch": 0.68, + "grad_norm": 1.8241569533587358, + "learning_rate": 2.4936996680844e-06, + "loss": 0.5459, + "step": 9542 + }, + { + "epoch": 0.68, + "grad_norm": 1.558975224550322, + "learning_rate": 2.4927053639267827e-06, + "loss": 0.516, + "step": 9543 + }, + { + "epoch": 0.68, + "grad_norm": 2.4980271780878778, + "learning_rate": 2.4917111922129663e-06, + "loss": 0.4988, + "step": 9544 + }, + { + "epoch": 0.68, + "grad_norm": 1.6932431685756684, + "learning_rate": 2.4907171529954616e-06, + "loss": 0.4999, + "step": 9545 + }, + { + "epoch": 0.68, + "grad_norm": 1.5745391868989644, + "learning_rate": 2.4897232463267797e-06, + "loss": 0.4743, + "step": 9546 + }, + { + "epoch": 0.68, + "grad_norm": 1.9878366173094526, + "learning_rate": 2.488729472259422e-06, + "loss": 0.5301, + "step": 9547 + }, + { + "epoch": 0.68, + "grad_norm": 1.8631650287995503, + "learning_rate": 2.4877358308458828e-06, + "loss": 0.5347, + "step": 9548 + }, + { + "epoch": 0.68, + "grad_norm": 1.852351578240347, + "learning_rate": 2.48674232213865e-06, + "loss": 0.5053, + "step": 9549 + }, + { + "epoch": 0.68, + "grad_norm": 1.6330634572438074, + "learning_rate": 2.4857489461902034e-06, + "loss": 0.5432, + "step": 9550 + }, + { + "epoch": 0.68, + "grad_norm": 1.7360097987857852, + "learning_rate": 2.484755703053019e-06, + "loss": 0.436, + "step": 9551 + }, + { + "epoch": 0.68, + "grad_norm": 1.6325831783489244, + "learning_rate": 2.4837625927795592e-06, + "loss": 0.503, + "step": 9552 + }, + { + "epoch": 0.68, + "grad_norm": 1.9333221507410914, + "learning_rate": 2.482769615422286e-06, + "loss": 0.5557, + "step": 9553 + }, + { + "epoch": 0.68, + "grad_norm": 1.9565121364111384, + "learning_rate": 2.4817767710336503e-06, + "loss": 0.5189, + "step": 9554 + }, + { + "epoch": 0.68, + "grad_norm": 2.6553194290600133, + "learning_rate": 2.4807840596660986e-06, + "loss": 0.5087, + "step": 9555 + }, + { + "epoch": 0.68, + "grad_norm": 1.952742800569021, + "learning_rate": 2.479791481372072e-06, + "loss": 0.5122, + "step": 9556 + }, + { + "epoch": 0.68, + "grad_norm": 1.8264825487052163, + "learning_rate": 2.478799036203994e-06, + "loss": 0.5805, + "step": 9557 + }, + { + "epoch": 0.68, + "grad_norm": 1.6943459428099492, + "learning_rate": 2.4778067242142985e-06, + "loss": 0.5011, + "step": 9558 + }, + { + "epoch": 0.68, + "grad_norm": 1.853499828518338, + "learning_rate": 2.476814545455396e-06, + "loss": 0.5415, + "step": 9559 + }, + { + "epoch": 0.68, + "grad_norm": 1.551611367214949, + "learning_rate": 2.4758224999796987e-06, + "loss": 0.4644, + "step": 9560 + }, + { + "epoch": 0.68, + "grad_norm": 1.852126994506452, + "learning_rate": 2.4748305878396105e-06, + "loss": 0.5151, + "step": 9561 + }, + { + "epoch": 0.68, + "grad_norm": 0.8082480467119703, + "learning_rate": 2.473838809087527e-06, + "loss": 0.4092, + "step": 9562 + }, + { + "epoch": 0.68, + "grad_norm": 1.6972044468724394, + "learning_rate": 2.472847163775839e-06, + "loss": 0.5004, + "step": 9563 + }, + { + "epoch": 0.68, + "grad_norm": 1.8414117624802269, + "learning_rate": 2.4718556519569238e-06, + "loss": 0.5388, + "step": 9564 + }, + { + "epoch": 0.68, + "grad_norm": 1.7411187096555942, + "learning_rate": 2.4708642736831624e-06, + "loss": 0.4674, + "step": 9565 + }, + { + "epoch": 0.68, + "grad_norm": 1.6947042490477509, + "learning_rate": 2.469873029006919e-06, + "loss": 0.5267, + "step": 9566 + }, + { + "epoch": 0.68, + "grad_norm": 1.8947207283291747, + "learning_rate": 2.4688819179805547e-06, + "loss": 0.496, + "step": 9567 + }, + { + "epoch": 0.68, + "grad_norm": 1.680949200879843, + "learning_rate": 2.4678909406564246e-06, + "loss": 0.4976, + "step": 9568 + }, + { + "epoch": 0.68, + "grad_norm": 1.766009164129411, + "learning_rate": 2.4669000970868745e-06, + "loss": 0.5155, + "step": 9569 + }, + { + "epoch": 0.68, + "grad_norm": 10.445280831561234, + "learning_rate": 2.4659093873242466e-06, + "loss": 0.5396, + "step": 9570 + }, + { + "epoch": 0.68, + "grad_norm": 3.2145917133301234, + "learning_rate": 2.46491881142087e-06, + "loss": 0.5587, + "step": 9571 + }, + { + "epoch": 0.68, + "grad_norm": 1.6820701632272255, + "learning_rate": 2.4639283694290714e-06, + "loss": 0.5074, + "step": 9572 + }, + { + "epoch": 0.68, + "grad_norm": 1.8844335329400594, + "learning_rate": 2.4629380614011705e-06, + "loss": 0.5811, + "step": 9573 + }, + { + "epoch": 0.68, + "grad_norm": 1.9398746387485344, + "learning_rate": 2.461947887389479e-06, + "loss": 0.5666, + "step": 9574 + }, + { + "epoch": 0.68, + "grad_norm": 1.9609301187943184, + "learning_rate": 2.4609578474462986e-06, + "loss": 0.5499, + "step": 9575 + }, + { + "epoch": 0.68, + "grad_norm": 1.7183875863020177, + "learning_rate": 2.4599679416239264e-06, + "loss": 0.5626, + "step": 9576 + }, + { + "epoch": 0.68, + "grad_norm": 2.9360086989093914, + "learning_rate": 2.458978169974658e-06, + "loss": 0.4993, + "step": 9577 + }, + { + "epoch": 0.68, + "grad_norm": 1.711903623150239, + "learning_rate": 2.457988532550771e-06, + "loss": 0.4473, + "step": 9578 + }, + { + "epoch": 0.68, + "grad_norm": 1.7350133731129185, + "learning_rate": 2.4569990294045426e-06, + "loss": 0.5146, + "step": 9579 + }, + { + "epoch": 0.68, + "grad_norm": 1.8130451758861044, + "learning_rate": 2.456009660588243e-06, + "loss": 0.5549, + "step": 9580 + }, + { + "epoch": 0.68, + "grad_norm": 1.8784648522692742, + "learning_rate": 2.455020426154135e-06, + "loss": 0.5001, + "step": 9581 + }, + { + "epoch": 0.68, + "grad_norm": 1.7029316562199608, + "learning_rate": 2.4540313261544696e-06, + "loss": 0.4918, + "step": 9582 + }, + { + "epoch": 0.68, + "grad_norm": 2.2115179537516596, + "learning_rate": 2.4530423606414966e-06, + "loss": 0.5586, + "step": 9583 + }, + { + "epoch": 0.68, + "grad_norm": 2.298389140851446, + "learning_rate": 2.4520535296674565e-06, + "loss": 0.5391, + "step": 9584 + }, + { + "epoch": 0.68, + "grad_norm": 0.6715579160679879, + "learning_rate": 2.4510648332845827e-06, + "loss": 0.3968, + "step": 9585 + }, + { + "epoch": 0.68, + "grad_norm": 1.6706786945390941, + "learning_rate": 2.450076271545103e-06, + "loss": 0.5066, + "step": 9586 + }, + { + "epoch": 0.68, + "grad_norm": 1.631643744810339, + "learning_rate": 2.4490878445012313e-06, + "loss": 0.4488, + "step": 9587 + }, + { + "epoch": 0.68, + "grad_norm": 1.8576095885645028, + "learning_rate": 2.448099552205187e-06, + "loss": 0.5679, + "step": 9588 + }, + { + "epoch": 0.68, + "grad_norm": 2.0684579814895114, + "learning_rate": 2.4471113947091697e-06, + "loss": 0.5251, + "step": 9589 + }, + { + "epoch": 0.68, + "grad_norm": 1.9953171470408428, + "learning_rate": 2.446123372065379e-06, + "loss": 0.5786, + "step": 9590 + }, + { + "epoch": 0.68, + "grad_norm": 0.6636786446021233, + "learning_rate": 2.445135484326005e-06, + "loss": 0.4484, + "step": 9591 + }, + { + "epoch": 0.68, + "grad_norm": 1.7907418374842137, + "learning_rate": 2.4441477315432324e-06, + "loss": 0.4782, + "step": 9592 + }, + { + "epoch": 0.68, + "grad_norm": 1.9537334149203662, + "learning_rate": 2.4431601137692383e-06, + "loss": 0.5533, + "step": 9593 + }, + { + "epoch": 0.68, + "grad_norm": 1.7966067205624559, + "learning_rate": 2.442172631056189e-06, + "loss": 0.5041, + "step": 9594 + }, + { + "epoch": 0.68, + "grad_norm": 1.6090092832165857, + "learning_rate": 2.441185283456249e-06, + "loss": 0.4934, + "step": 9595 + }, + { + "epoch": 0.68, + "grad_norm": 2.4183084889583575, + "learning_rate": 2.4401980710215716e-06, + "loss": 0.5696, + "step": 9596 + }, + { + "epoch": 0.68, + "grad_norm": 1.8852750049293814, + "learning_rate": 2.4392109938043067e-06, + "loss": 0.5186, + "step": 9597 + }, + { + "epoch": 0.68, + "grad_norm": 0.7135088439101847, + "learning_rate": 2.438224051856594e-06, + "loss": 0.4103, + "step": 9598 + }, + { + "epoch": 0.68, + "grad_norm": 1.683944205881511, + "learning_rate": 2.4372372452305683e-06, + "loss": 0.4589, + "step": 9599 + }, + { + "epoch": 0.68, + "grad_norm": 1.8582910428671553, + "learning_rate": 2.436250573978356e-06, + "loss": 0.6219, + "step": 9600 + }, + { + "epoch": 0.68, + "grad_norm": 2.1222296661880096, + "learning_rate": 2.4352640381520737e-06, + "loss": 0.5995, + "step": 9601 + }, + { + "epoch": 0.68, + "grad_norm": 2.3580470030994203, + "learning_rate": 2.4342776378038358e-06, + "loss": 0.5823, + "step": 9602 + }, + { + "epoch": 0.68, + "grad_norm": 2.0128325241315514, + "learning_rate": 2.4332913729857466e-06, + "loss": 0.5588, + "step": 9603 + }, + { + "epoch": 0.68, + "grad_norm": 2.3822140829201848, + "learning_rate": 2.4323052437499046e-06, + "loss": 0.4959, + "step": 9604 + }, + { + "epoch": 0.68, + "grad_norm": 1.7112563955961178, + "learning_rate": 2.431319250148402e-06, + "loss": 0.5412, + "step": 9605 + }, + { + "epoch": 0.68, + "grad_norm": 1.664078167254722, + "learning_rate": 2.430333392233316e-06, + "loss": 0.5848, + "step": 9606 + }, + { + "epoch": 0.68, + "grad_norm": 3.668304497860216, + "learning_rate": 2.429347670056732e-06, + "loss": 0.4826, + "step": 9607 + }, + { + "epoch": 0.68, + "grad_norm": 1.7745119659289073, + "learning_rate": 2.428362083670713e-06, + "loss": 0.5834, + "step": 9608 + }, + { + "epoch": 0.68, + "grad_norm": 1.719230827065138, + "learning_rate": 2.4273766331273235e-06, + "loss": 0.5154, + "step": 9609 + }, + { + "epoch": 0.68, + "grad_norm": 8.039030953657164, + "learning_rate": 2.426391318478614e-06, + "loss": 0.5418, + "step": 9610 + }, + { + "epoch": 0.68, + "grad_norm": 1.7871671378822676, + "learning_rate": 2.4254061397766403e-06, + "loss": 0.5371, + "step": 9611 + }, + { + "epoch": 0.68, + "grad_norm": 4.156800602127164, + "learning_rate": 2.4244210970734354e-06, + "loss": 0.488, + "step": 9612 + }, + { + "epoch": 0.68, + "grad_norm": 1.9159866945345476, + "learning_rate": 2.4234361904210357e-06, + "loss": 0.5179, + "step": 9613 + }, + { + "epoch": 0.68, + "grad_norm": 1.9667429349145797, + "learning_rate": 2.422451419871467e-06, + "loss": 0.5536, + "step": 9614 + }, + { + "epoch": 0.68, + "grad_norm": 2.008466148218622, + "learning_rate": 2.4214667854767484e-06, + "loss": 0.4948, + "step": 9615 + }, + { + "epoch": 0.68, + "grad_norm": 2.5618663358603255, + "learning_rate": 2.4204822872888935e-06, + "loss": 0.5105, + "step": 9616 + }, + { + "epoch": 0.68, + "grad_norm": 2.145010496636656, + "learning_rate": 2.4194979253599004e-06, + "loss": 0.5428, + "step": 9617 + }, + { + "epoch": 0.68, + "grad_norm": 1.9775325740704937, + "learning_rate": 2.4185136997417748e-06, + "loss": 0.5038, + "step": 9618 + }, + { + "epoch": 0.68, + "grad_norm": 2.08691380280909, + "learning_rate": 2.4175296104865008e-06, + "loss": 0.5288, + "step": 9619 + }, + { + "epoch": 0.68, + "grad_norm": 1.7767166240308947, + "learning_rate": 2.4165456576460638e-06, + "loss": 0.5089, + "step": 9620 + }, + { + "epoch": 0.68, + "grad_norm": 1.6896755097495912, + "learning_rate": 2.415561841272439e-06, + "loss": 0.5599, + "step": 9621 + }, + { + "epoch": 0.68, + "grad_norm": 3.09077774601849, + "learning_rate": 2.414578161417595e-06, + "loss": 0.5608, + "step": 9622 + }, + { + "epoch": 0.68, + "grad_norm": 2.0115685681436695, + "learning_rate": 2.413594618133495e-06, + "loss": 0.5644, + "step": 9623 + }, + { + "epoch": 0.68, + "grad_norm": 1.6285159494116115, + "learning_rate": 2.412611211472089e-06, + "loss": 0.447, + "step": 9624 + }, + { + "epoch": 0.68, + "grad_norm": 1.498081385007708, + "learning_rate": 2.4116279414853265e-06, + "loss": 0.5396, + "step": 9625 + }, + { + "epoch": 0.68, + "grad_norm": 1.7421109857184551, + "learning_rate": 2.4106448082251466e-06, + "loss": 0.5695, + "step": 9626 + }, + { + "epoch": 0.68, + "grad_norm": 1.8268009152160873, + "learning_rate": 2.4096618117434815e-06, + "loss": 0.5122, + "step": 9627 + }, + { + "epoch": 0.68, + "grad_norm": 1.3805049180469506, + "learning_rate": 2.4086789520922578e-06, + "loss": 0.4929, + "step": 9628 + }, + { + "epoch": 0.68, + "grad_norm": 1.8648818199509813, + "learning_rate": 2.4076962293233913e-06, + "loss": 0.5435, + "step": 9629 + }, + { + "epoch": 0.68, + "grad_norm": 1.6630724784711406, + "learning_rate": 2.4067136434887967e-06, + "loss": 0.5596, + "step": 9630 + }, + { + "epoch": 0.68, + "grad_norm": 1.8679785844554826, + "learning_rate": 2.405731194640372e-06, + "loss": 0.4329, + "step": 9631 + }, + { + "epoch": 0.68, + "grad_norm": 1.6490938436951075, + "learning_rate": 2.4047488828300166e-06, + "loss": 0.4649, + "step": 9632 + }, + { + "epoch": 0.68, + "grad_norm": 1.8972396161508762, + "learning_rate": 2.403766708109619e-06, + "loss": 0.5438, + "step": 9633 + }, + { + "epoch": 0.68, + "grad_norm": 2.0082860902695945, + "learning_rate": 2.4027846705310615e-06, + "loss": 0.539, + "step": 9634 + }, + { + "epoch": 0.68, + "grad_norm": 2.1723936954406247, + "learning_rate": 2.401802770146219e-06, + "loss": 0.6375, + "step": 9635 + }, + { + "epoch": 0.68, + "grad_norm": 1.553225181520766, + "learning_rate": 2.4008210070069554e-06, + "loss": 0.481, + "step": 9636 + }, + { + "epoch": 0.68, + "grad_norm": 1.891443991007578, + "learning_rate": 2.399839381165137e-06, + "loss": 0.5151, + "step": 9637 + }, + { + "epoch": 0.68, + "grad_norm": 1.88999372008184, + "learning_rate": 2.3988578926726112e-06, + "loss": 0.5776, + "step": 9638 + }, + { + "epoch": 0.68, + "grad_norm": 1.7107626454275147, + "learning_rate": 2.397876541581227e-06, + "loss": 0.4857, + "step": 9639 + }, + { + "epoch": 0.68, + "grad_norm": 2.066592409323531, + "learning_rate": 2.3968953279428175e-06, + "loss": 0.5646, + "step": 9640 + }, + { + "epoch": 0.68, + "grad_norm": 1.456923225283102, + "learning_rate": 2.395914251809221e-06, + "loss": 0.5291, + "step": 9641 + }, + { + "epoch": 0.68, + "grad_norm": 1.6434393381699206, + "learning_rate": 2.394933313232256e-06, + "loss": 0.5262, + "step": 9642 + }, + { + "epoch": 0.68, + "grad_norm": 0.7547059262723421, + "learning_rate": 2.393952512263738e-06, + "loss": 0.4354, + "step": 9643 + }, + { + "epoch": 0.68, + "grad_norm": 1.6322853463247602, + "learning_rate": 2.392971848955483e-06, + "loss": 0.5006, + "step": 9644 + }, + { + "epoch": 0.68, + "grad_norm": 1.5049163449136036, + "learning_rate": 2.391991323359287e-06, + "loss": 0.5343, + "step": 9645 + }, + { + "epoch": 0.68, + "grad_norm": 1.6780925953778096, + "learning_rate": 2.3910109355269485e-06, + "loss": 0.5331, + "step": 9646 + }, + { + "epoch": 0.68, + "grad_norm": 1.8391275800045892, + "learning_rate": 2.3900306855102482e-06, + "loss": 0.5945, + "step": 9647 + }, + { + "epoch": 0.68, + "grad_norm": 1.9052184544425523, + "learning_rate": 2.3890505733609752e-06, + "loss": 0.5728, + "step": 9648 + }, + { + "epoch": 0.68, + "grad_norm": 1.891118447636134, + "learning_rate": 2.388070599130896e-06, + "loss": 0.516, + "step": 9649 + }, + { + "epoch": 0.68, + "grad_norm": 1.6619637124295878, + "learning_rate": 2.3870907628717787e-06, + "loss": 0.506, + "step": 9650 + }, + { + "epoch": 0.68, + "grad_norm": 1.714740117786903, + "learning_rate": 2.3861110646353804e-06, + "loss": 0.4947, + "step": 9651 + }, + { + "epoch": 0.68, + "grad_norm": 1.8795911532916356, + "learning_rate": 2.385131504473453e-06, + "loss": 0.5654, + "step": 9652 + }, + { + "epoch": 0.68, + "grad_norm": 1.7687032138593557, + "learning_rate": 2.384152082437742e-06, + "loss": 0.4982, + "step": 9653 + }, + { + "epoch": 0.69, + "grad_norm": 1.7318848519220131, + "learning_rate": 2.3831727985799792e-06, + "loss": 0.5384, + "step": 9654 + }, + { + "epoch": 0.69, + "grad_norm": 1.4707849783710212, + "learning_rate": 2.382193652951897e-06, + "loss": 0.5167, + "step": 9655 + }, + { + "epoch": 0.69, + "grad_norm": 2.1945608387239024, + "learning_rate": 2.3812146456052155e-06, + "loss": 0.5822, + "step": 9656 + }, + { + "epoch": 0.69, + "grad_norm": 1.7122781316756488, + "learning_rate": 2.3802357765916507e-06, + "loss": 0.4975, + "step": 9657 + }, + { + "epoch": 0.69, + "grad_norm": 1.6951044331552443, + "learning_rate": 2.379257045962911e-06, + "loss": 0.523, + "step": 9658 + }, + { + "epoch": 0.69, + "grad_norm": 1.8054299410815489, + "learning_rate": 2.3782784537706905e-06, + "loss": 0.5988, + "step": 9659 + }, + { + "epoch": 0.69, + "grad_norm": 1.696654150313717, + "learning_rate": 2.3773000000666895e-06, + "loss": 0.5485, + "step": 9660 + }, + { + "epoch": 0.69, + "grad_norm": 1.8463808570501872, + "learning_rate": 2.376321684902587e-06, + "loss": 0.549, + "step": 9661 + }, + { + "epoch": 0.69, + "grad_norm": 0.7268450219389964, + "learning_rate": 2.375343508330064e-06, + "loss": 0.4001, + "step": 9662 + }, + { + "epoch": 0.69, + "grad_norm": 1.7267263724348494, + "learning_rate": 2.3743654704007906e-06, + "loss": 0.5181, + "step": 9663 + }, + { + "epoch": 0.69, + "grad_norm": 2.0690023829139963, + "learning_rate": 2.3733875711664296e-06, + "loss": 0.5146, + "step": 9664 + }, + { + "epoch": 0.69, + "grad_norm": 1.6031077975368517, + "learning_rate": 2.372409810678639e-06, + "loss": 0.5357, + "step": 9665 + }, + { + "epoch": 0.69, + "grad_norm": 1.7533683237193083, + "learning_rate": 2.3714321889890623e-06, + "loss": 0.5013, + "step": 9666 + }, + { + "epoch": 0.69, + "grad_norm": 1.5910701971635117, + "learning_rate": 2.370454706149348e-06, + "loss": 0.4991, + "step": 9667 + }, + { + "epoch": 0.69, + "grad_norm": 1.8899799969970354, + "learning_rate": 2.369477362211124e-06, + "loss": 0.621, + "step": 9668 + }, + { + "epoch": 0.69, + "grad_norm": 1.639762493839967, + "learning_rate": 2.368500157226021e-06, + "loss": 0.5253, + "step": 9669 + }, + { + "epoch": 0.69, + "grad_norm": 1.547093097682518, + "learning_rate": 2.3675230912456525e-06, + "loss": 0.4248, + "step": 9670 + }, + { + "epoch": 0.69, + "grad_norm": 1.9338899450303266, + "learning_rate": 2.366546164321637e-06, + "loss": 0.5769, + "step": 9671 + }, + { + "epoch": 0.69, + "grad_norm": 0.766105732873643, + "learning_rate": 2.3655693765055776e-06, + "loss": 0.4406, + "step": 9672 + }, + { + "epoch": 0.69, + "grad_norm": 2.2886378304249533, + "learning_rate": 2.364592727849069e-06, + "loss": 0.4774, + "step": 9673 + }, + { + "epoch": 0.69, + "grad_norm": 1.905989960256344, + "learning_rate": 2.363616218403702e-06, + "loss": 0.5309, + "step": 9674 + }, + { + "epoch": 0.69, + "grad_norm": 1.5939169102625215, + "learning_rate": 2.36263984822106e-06, + "loss": 0.5068, + "step": 9675 + }, + { + "epoch": 0.69, + "grad_norm": 1.8241803203480123, + "learning_rate": 2.3616636173527196e-06, + "loss": 0.5913, + "step": 9676 + }, + { + "epoch": 0.69, + "grad_norm": 0.7546650492108021, + "learning_rate": 2.3606875258502433e-06, + "loss": 0.4318, + "step": 9677 + }, + { + "epoch": 0.69, + "grad_norm": 1.5515550406588925, + "learning_rate": 2.359711573765199e-06, + "loss": 0.4407, + "step": 9678 + }, + { + "epoch": 0.69, + "grad_norm": 1.877280232280693, + "learning_rate": 2.3587357611491337e-06, + "loss": 0.5732, + "step": 9679 + }, + { + "epoch": 0.69, + "grad_norm": 1.5867770438057536, + "learning_rate": 2.357760088053595e-06, + "loss": 0.5453, + "step": 9680 + }, + { + "epoch": 0.69, + "grad_norm": 1.7095753277278445, + "learning_rate": 2.3567845545301216e-06, + "loss": 0.5209, + "step": 9681 + }, + { + "epoch": 0.69, + "grad_norm": 1.7294618572672786, + "learning_rate": 2.3558091606302448e-06, + "loss": 0.4707, + "step": 9682 + }, + { + "epoch": 0.69, + "grad_norm": 1.9447321110997529, + "learning_rate": 2.3548339064054898e-06, + "loss": 0.5479, + "step": 9683 + }, + { + "epoch": 0.69, + "grad_norm": 1.7927290691912816, + "learning_rate": 2.353858791907369e-06, + "loss": 0.5491, + "step": 9684 + }, + { + "epoch": 0.69, + "grad_norm": 1.8901077533736064, + "learning_rate": 2.352883817187393e-06, + "loss": 0.6015, + "step": 9685 + }, + { + "epoch": 0.69, + "grad_norm": 1.8229733548640452, + "learning_rate": 2.351908982297064e-06, + "loss": 0.5444, + "step": 9686 + }, + { + "epoch": 0.69, + "grad_norm": 1.8486539378617293, + "learning_rate": 2.3509342872878745e-06, + "loss": 0.4934, + "step": 9687 + }, + { + "epoch": 0.69, + "grad_norm": 1.599023473474026, + "learning_rate": 2.349959732211315e-06, + "loss": 0.5803, + "step": 9688 + }, + { + "epoch": 0.69, + "grad_norm": 2.002305253530704, + "learning_rate": 2.3489853171188576e-06, + "loss": 0.5482, + "step": 9689 + }, + { + "epoch": 0.69, + "grad_norm": 1.7607307991302097, + "learning_rate": 2.3480110420619827e-06, + "loss": 0.5318, + "step": 9690 + }, + { + "epoch": 0.69, + "grad_norm": 0.6783820516278984, + "learning_rate": 2.347036907092149e-06, + "loss": 0.4311, + "step": 9691 + }, + { + "epoch": 0.69, + "grad_norm": 1.5355355565630149, + "learning_rate": 2.3460629122608146e-06, + "loss": 0.4408, + "step": 9692 + }, + { + "epoch": 0.69, + "grad_norm": 0.6624803588869154, + "learning_rate": 2.3450890576194308e-06, + "loss": 0.413, + "step": 9693 + }, + { + "epoch": 0.69, + "grad_norm": 2.0909935875789767, + "learning_rate": 2.3441153432194387e-06, + "loss": 0.4857, + "step": 9694 + }, + { + "epoch": 0.69, + "grad_norm": 1.6915842598027797, + "learning_rate": 2.3431417691122753e-06, + "loss": 0.4846, + "step": 9695 + }, + { + "epoch": 0.69, + "grad_norm": 2.3084265208131844, + "learning_rate": 2.3421683353493623e-06, + "loss": 0.5561, + "step": 9696 + }, + { + "epoch": 0.69, + "grad_norm": 1.4748566703904935, + "learning_rate": 2.341195041982128e-06, + "loss": 0.5278, + "step": 9697 + }, + { + "epoch": 0.69, + "grad_norm": 1.741744206412284, + "learning_rate": 2.3402218890619793e-06, + "loss": 0.4235, + "step": 9698 + }, + { + "epoch": 0.69, + "grad_norm": 1.8158697207660703, + "learning_rate": 2.3392488766403224e-06, + "loss": 0.587, + "step": 9699 + }, + { + "epoch": 0.69, + "grad_norm": 1.8145529251033394, + "learning_rate": 2.338276004768556e-06, + "loss": 0.5014, + "step": 9700 + }, + { + "epoch": 0.69, + "grad_norm": 1.4662875126056782, + "learning_rate": 2.33730327349807e-06, + "loss": 0.5235, + "step": 9701 + }, + { + "epoch": 0.69, + "grad_norm": 1.5810085902491504, + "learning_rate": 2.3363306828802494e-06, + "loss": 0.5093, + "step": 9702 + }, + { + "epoch": 0.69, + "grad_norm": 2.0887149660566395, + "learning_rate": 2.335358232966466e-06, + "loss": 0.5433, + "step": 9703 + }, + { + "epoch": 0.69, + "grad_norm": 1.6452600578084005, + "learning_rate": 2.3343859238080897e-06, + "loss": 0.5643, + "step": 9704 + }, + { + "epoch": 0.69, + "grad_norm": 1.5950349589435142, + "learning_rate": 2.3334137554564817e-06, + "loss": 0.5125, + "step": 9705 + }, + { + "epoch": 0.69, + "grad_norm": 1.743533157515716, + "learning_rate": 2.332441727962996e-06, + "loss": 0.5411, + "step": 9706 + }, + { + "epoch": 0.69, + "grad_norm": 1.6597038470694456, + "learning_rate": 2.3314698413789738e-06, + "loss": 0.5331, + "step": 9707 + }, + { + "epoch": 0.69, + "grad_norm": 1.7294436241495723, + "learning_rate": 2.3304980957557587e-06, + "loss": 0.544, + "step": 9708 + }, + { + "epoch": 0.69, + "grad_norm": 2.0543890153487485, + "learning_rate": 2.3295264911446812e-06, + "loss": 0.471, + "step": 9709 + }, + { + "epoch": 0.69, + "grad_norm": 1.5164334231859424, + "learning_rate": 2.328555027597062e-06, + "loss": 0.5113, + "step": 9710 + }, + { + "epoch": 0.69, + "grad_norm": 1.7338193581547061, + "learning_rate": 2.3275837051642177e-06, + "loss": 0.532, + "step": 9711 + }, + { + "epoch": 0.69, + "grad_norm": 1.7028900201934263, + "learning_rate": 2.3266125238974573e-06, + "loss": 0.5447, + "step": 9712 + }, + { + "epoch": 0.69, + "grad_norm": 1.8203394447615429, + "learning_rate": 2.3256414838480846e-06, + "loss": 0.4893, + "step": 9713 + }, + { + "epoch": 0.69, + "grad_norm": 1.9374825560531639, + "learning_rate": 2.3246705850673885e-06, + "loss": 0.5841, + "step": 9714 + }, + { + "epoch": 0.69, + "grad_norm": 1.9646893317680507, + "learning_rate": 2.3236998276066575e-06, + "loss": 0.5691, + "step": 9715 + }, + { + "epoch": 0.69, + "grad_norm": 0.6739759458106583, + "learning_rate": 2.32272921151717e-06, + "loss": 0.4167, + "step": 9716 + }, + { + "epoch": 0.69, + "grad_norm": 1.7887761796192139, + "learning_rate": 2.3217587368501976e-06, + "loss": 0.5451, + "step": 9717 + }, + { + "epoch": 0.69, + "grad_norm": 1.8281315117448913, + "learning_rate": 2.3207884036570056e-06, + "loss": 0.5607, + "step": 9718 + }, + { + "epoch": 0.69, + "grad_norm": 1.4891015531184466, + "learning_rate": 2.319818211988845e-06, + "loss": 0.4557, + "step": 9719 + }, + { + "epoch": 0.69, + "grad_norm": 1.7791805765281783, + "learning_rate": 2.318848161896971e-06, + "loss": 0.5609, + "step": 9720 + }, + { + "epoch": 0.69, + "grad_norm": 1.8461719845254208, + "learning_rate": 2.317878253432621e-06, + "loss": 0.4859, + "step": 9721 + }, + { + "epoch": 0.69, + "grad_norm": 1.4456484988475986, + "learning_rate": 2.3169084866470297e-06, + "loss": 0.5115, + "step": 9722 + }, + { + "epoch": 0.69, + "grad_norm": 1.9647630411142032, + "learning_rate": 2.3159388615914234e-06, + "loss": 0.5696, + "step": 9723 + }, + { + "epoch": 0.69, + "grad_norm": 1.70415832038415, + "learning_rate": 2.314969378317022e-06, + "loss": 0.4634, + "step": 9724 + }, + { + "epoch": 0.69, + "grad_norm": 1.5825333841364404, + "learning_rate": 2.3140000368750375e-06, + "loss": 0.5044, + "step": 9725 + }, + { + "epoch": 0.69, + "grad_norm": 1.9036931200954714, + "learning_rate": 2.313030837316669e-06, + "loss": 0.5502, + "step": 9726 + }, + { + "epoch": 0.69, + "grad_norm": 1.720146302408287, + "learning_rate": 2.31206177969312e-06, + "loss": 0.49, + "step": 9727 + }, + { + "epoch": 0.69, + "grad_norm": 1.6401267101372148, + "learning_rate": 2.3110928640555736e-06, + "loss": 0.5504, + "step": 9728 + }, + { + "epoch": 0.69, + "grad_norm": 1.6918370911647205, + "learning_rate": 2.3101240904552138e-06, + "loss": 0.5712, + "step": 9729 + }, + { + "epoch": 0.69, + "grad_norm": 1.818275127413543, + "learning_rate": 2.3091554589432142e-06, + "loss": 0.5987, + "step": 9730 + }, + { + "epoch": 0.69, + "grad_norm": 1.580037050067111, + "learning_rate": 2.3081869695707404e-06, + "loss": 0.5052, + "step": 9731 + }, + { + "epoch": 0.69, + "grad_norm": 1.7342208824027834, + "learning_rate": 2.3072186223889543e-06, + "loss": 0.5272, + "step": 9732 + }, + { + "epoch": 0.69, + "grad_norm": 1.7154978353023667, + "learning_rate": 2.306250417449003e-06, + "loss": 0.4807, + "step": 9733 + }, + { + "epoch": 0.69, + "grad_norm": 1.6551836637353523, + "learning_rate": 2.3052823548020325e-06, + "loss": 0.5358, + "step": 9734 + }, + { + "epoch": 0.69, + "grad_norm": 1.774074635914195, + "learning_rate": 2.304314434499179e-06, + "loss": 0.5279, + "step": 9735 + }, + { + "epoch": 0.69, + "grad_norm": 2.159175676488079, + "learning_rate": 2.3033466565915707e-06, + "loss": 0.4797, + "step": 9736 + }, + { + "epoch": 0.69, + "grad_norm": 1.8312616941870463, + "learning_rate": 2.302379021130332e-06, + "loss": 0.5331, + "step": 9737 + }, + { + "epoch": 0.69, + "grad_norm": 1.9948508015697015, + "learning_rate": 2.3014115281665707e-06, + "loss": 0.6009, + "step": 9738 + }, + { + "epoch": 0.69, + "grad_norm": 1.4409636546117957, + "learning_rate": 2.3004441777513996e-06, + "loss": 0.4791, + "step": 9739 + }, + { + "epoch": 0.69, + "grad_norm": 2.0089803309236247, + "learning_rate": 2.2994769699359125e-06, + "loss": 0.6072, + "step": 9740 + }, + { + "epoch": 0.69, + "grad_norm": 1.8773971502124038, + "learning_rate": 2.2985099047712028e-06, + "loss": 0.5379, + "step": 9741 + }, + { + "epoch": 0.69, + "grad_norm": 1.9934135424013362, + "learning_rate": 2.2975429823083538e-06, + "loss": 0.4856, + "step": 9742 + }, + { + "epoch": 0.69, + "grad_norm": 1.4576319873617547, + "learning_rate": 2.296576202598443e-06, + "loss": 0.4992, + "step": 9743 + }, + { + "epoch": 0.69, + "grad_norm": 1.8986928570965453, + "learning_rate": 2.2956095656925366e-06, + "loss": 0.5171, + "step": 9744 + }, + { + "epoch": 0.69, + "grad_norm": 2.207293399251415, + "learning_rate": 2.294643071641696e-06, + "loss": 0.5541, + "step": 9745 + }, + { + "epoch": 0.69, + "grad_norm": 1.5925524492841927, + "learning_rate": 2.2936767204969762e-06, + "loss": 0.4533, + "step": 9746 + }, + { + "epoch": 0.69, + "grad_norm": 1.5678707339411364, + "learning_rate": 2.2927105123094217e-06, + "loss": 0.5601, + "step": 9747 + }, + { + "epoch": 0.69, + "grad_norm": 1.7588709897517392, + "learning_rate": 2.2917444471300733e-06, + "loss": 0.5221, + "step": 9748 + }, + { + "epoch": 0.69, + "grad_norm": 1.7218656456911963, + "learning_rate": 2.290778525009956e-06, + "loss": 0.5752, + "step": 9749 + }, + { + "epoch": 0.69, + "grad_norm": 1.6990540002271146, + "learning_rate": 2.289812746000102e-06, + "loss": 0.5806, + "step": 9750 + }, + { + "epoch": 0.69, + "grad_norm": 1.8200136531976747, + "learning_rate": 2.2888471101515194e-06, + "loss": 0.5022, + "step": 9751 + }, + { + "epoch": 0.69, + "grad_norm": 1.4780039394443454, + "learning_rate": 2.287881617515219e-06, + "loss": 0.5472, + "step": 9752 + }, + { + "epoch": 0.69, + "grad_norm": 1.9384819649968479, + "learning_rate": 2.2869162681422014e-06, + "loss": 0.5768, + "step": 9753 + }, + { + "epoch": 0.69, + "grad_norm": 1.5890684536589057, + "learning_rate": 2.2859510620834602e-06, + "loss": 0.4874, + "step": 9754 + }, + { + "epoch": 0.69, + "grad_norm": 1.838674774942663, + "learning_rate": 2.284985999389982e-06, + "loss": 0.5224, + "step": 9755 + }, + { + "epoch": 0.69, + "grad_norm": 1.9250016326617472, + "learning_rate": 2.2840210801127395e-06, + "loss": 0.5336, + "step": 9756 + }, + { + "epoch": 0.69, + "grad_norm": 1.5551144897928308, + "learning_rate": 2.2830563043027098e-06, + "loss": 0.5324, + "step": 9757 + }, + { + "epoch": 0.69, + "grad_norm": 1.61680023463662, + "learning_rate": 2.2820916720108505e-06, + "loss": 0.5218, + "step": 9758 + }, + { + "epoch": 0.69, + "grad_norm": 1.5204825376988291, + "learning_rate": 2.2811271832881188e-06, + "loss": 0.4943, + "step": 9759 + }, + { + "epoch": 0.69, + "grad_norm": 1.7471273883077791, + "learning_rate": 2.2801628381854624e-06, + "loss": 0.5246, + "step": 9760 + }, + { + "epoch": 0.69, + "grad_norm": 1.5317506001946553, + "learning_rate": 2.2791986367538206e-06, + "loss": 0.4605, + "step": 9761 + }, + { + "epoch": 0.69, + "grad_norm": 1.583891594547221, + "learning_rate": 2.278234579044128e-06, + "loss": 0.5387, + "step": 9762 + }, + { + "epoch": 0.69, + "grad_norm": 2.265264957433339, + "learning_rate": 2.2772706651073055e-06, + "loss": 0.5393, + "step": 9763 + }, + { + "epoch": 0.69, + "grad_norm": 1.8385113371723907, + "learning_rate": 2.276306894994273e-06, + "loss": 0.5051, + "step": 9764 + }, + { + "epoch": 0.69, + "grad_norm": 2.070098999658853, + "learning_rate": 2.275343268755939e-06, + "loss": 0.5193, + "step": 9765 + }, + { + "epoch": 0.69, + "grad_norm": 2.0134649446140824, + "learning_rate": 2.274379786443206e-06, + "loss": 0.5298, + "step": 9766 + }, + { + "epoch": 0.69, + "grad_norm": 1.6702191200538403, + "learning_rate": 2.27341644810697e-06, + "loss": 0.5681, + "step": 9767 + }, + { + "epoch": 0.69, + "grad_norm": 1.5769896075984415, + "learning_rate": 2.2724532537981126e-06, + "loss": 0.4636, + "step": 9768 + }, + { + "epoch": 0.69, + "grad_norm": 3.6695226923222353, + "learning_rate": 2.27149020356752e-06, + "loss": 0.5577, + "step": 9769 + }, + { + "epoch": 0.69, + "grad_norm": 1.8551965210392765, + "learning_rate": 2.270527297466059e-06, + "loss": 0.5056, + "step": 9770 + }, + { + "epoch": 0.69, + "grad_norm": 1.8481204115572398, + "learning_rate": 2.2695645355445965e-06, + "loss": 0.4663, + "step": 9771 + }, + { + "epoch": 0.69, + "grad_norm": 2.1514694618880834, + "learning_rate": 2.2686019178539835e-06, + "loss": 0.5646, + "step": 9772 + }, + { + "epoch": 0.69, + "grad_norm": 1.4693111647424204, + "learning_rate": 2.267639444445076e-06, + "loss": 0.4502, + "step": 9773 + }, + { + "epoch": 0.69, + "grad_norm": 2.0458428035768925, + "learning_rate": 2.2666771153687096e-06, + "loss": 0.4903, + "step": 9774 + }, + { + "epoch": 0.69, + "grad_norm": 1.5148752318899077, + "learning_rate": 2.2657149306757183e-06, + "loss": 0.4965, + "step": 9775 + }, + { + "epoch": 0.69, + "grad_norm": 1.6849122728551587, + "learning_rate": 2.2647528904169324e-06, + "loss": 0.5275, + "step": 9776 + }, + { + "epoch": 0.69, + "grad_norm": 1.7016994350821346, + "learning_rate": 2.2637909946431656e-06, + "loss": 0.5023, + "step": 9777 + }, + { + "epoch": 0.69, + "grad_norm": 3.9669850263693625, + "learning_rate": 2.2628292434052325e-06, + "loss": 0.4635, + "step": 9778 + }, + { + "epoch": 0.69, + "grad_norm": 1.8858119320989581, + "learning_rate": 2.261867636753929e-06, + "loss": 0.5326, + "step": 9779 + }, + { + "epoch": 0.69, + "grad_norm": 0.7157565542437598, + "learning_rate": 2.260906174740059e-06, + "loss": 0.437, + "step": 9780 + }, + { + "epoch": 0.69, + "grad_norm": 0.7202378924459611, + "learning_rate": 2.2599448574144047e-06, + "loss": 0.4231, + "step": 9781 + }, + { + "epoch": 0.69, + "grad_norm": 1.7952437531304084, + "learning_rate": 2.258983684827747e-06, + "loss": 0.4797, + "step": 9782 + }, + { + "epoch": 0.69, + "grad_norm": 1.7791493568955592, + "learning_rate": 2.258022657030859e-06, + "loss": 0.5464, + "step": 9783 + }, + { + "epoch": 0.69, + "grad_norm": 1.7566530186058351, + "learning_rate": 2.2570617740745054e-06, + "loss": 0.4491, + "step": 9784 + }, + { + "epoch": 0.69, + "grad_norm": 1.6333048071871863, + "learning_rate": 2.2561010360094447e-06, + "loss": 0.559, + "step": 9785 + }, + { + "epoch": 0.69, + "grad_norm": 1.5174857859245723, + "learning_rate": 2.2551404428864236e-06, + "loss": 0.4917, + "step": 9786 + }, + { + "epoch": 0.69, + "grad_norm": 2.1389021893128843, + "learning_rate": 2.2541799947561853e-06, + "loss": 0.4918, + "step": 9787 + }, + { + "epoch": 0.69, + "grad_norm": 1.686580714592363, + "learning_rate": 2.2532196916694633e-06, + "loss": 0.513, + "step": 9788 + }, + { + "epoch": 0.69, + "grad_norm": 1.6450877583967884, + "learning_rate": 2.252259533676985e-06, + "loss": 0.5612, + "step": 9789 + }, + { + "epoch": 0.69, + "grad_norm": 1.628652444908251, + "learning_rate": 2.2512995208294688e-06, + "loss": 0.4981, + "step": 9790 + }, + { + "epoch": 0.69, + "grad_norm": 1.8355199298065652, + "learning_rate": 2.250339653177625e-06, + "loss": 0.5068, + "step": 9791 + }, + { + "epoch": 0.69, + "grad_norm": 1.5534055191360703, + "learning_rate": 2.24937993077216e-06, + "loss": 0.5592, + "step": 9792 + }, + { + "epoch": 0.69, + "grad_norm": 1.7546093388778625, + "learning_rate": 2.248420353663766e-06, + "loss": 0.5508, + "step": 9793 + }, + { + "epoch": 0.7, + "grad_norm": 1.9418276595862947, + "learning_rate": 2.2474609219031325e-06, + "loss": 0.5614, + "step": 9794 + }, + { + "epoch": 0.7, + "grad_norm": 1.7659724092061488, + "learning_rate": 2.2465016355409395e-06, + "loss": 0.5244, + "step": 9795 + }, + { + "epoch": 0.7, + "grad_norm": 1.9078676817209506, + "learning_rate": 2.2455424946278603e-06, + "loss": 0.5138, + "step": 9796 + }, + { + "epoch": 0.7, + "grad_norm": 2.2238629544739674, + "learning_rate": 2.244583499214562e-06, + "loss": 0.5531, + "step": 9797 + }, + { + "epoch": 0.7, + "grad_norm": 1.6850235283148265, + "learning_rate": 2.243624649351696e-06, + "loss": 0.5203, + "step": 9798 + }, + { + "epoch": 0.7, + "grad_norm": 2.0103901501721877, + "learning_rate": 2.2426659450899203e-06, + "loss": 0.5142, + "step": 9799 + }, + { + "epoch": 0.7, + "grad_norm": 2.2203015186902033, + "learning_rate": 2.24170738647987e-06, + "loss": 0.526, + "step": 9800 + }, + { + "epoch": 0.7, + "grad_norm": 0.6935535403925185, + "learning_rate": 2.240748973572184e-06, + "loss": 0.4276, + "step": 9801 + }, + { + "epoch": 0.7, + "grad_norm": 2.20103154952227, + "learning_rate": 2.2397907064174827e-06, + "loss": 0.5519, + "step": 9802 + }, + { + "epoch": 0.7, + "grad_norm": 1.648189533844457, + "learning_rate": 2.2388325850663913e-06, + "loss": 0.5064, + "step": 9803 + }, + { + "epoch": 0.7, + "grad_norm": 1.9211950522476275, + "learning_rate": 2.2378746095695207e-06, + "loss": 0.5047, + "step": 9804 + }, + { + "epoch": 0.7, + "grad_norm": 1.7064761260042274, + "learning_rate": 2.236916779977469e-06, + "loss": 0.5075, + "step": 9805 + }, + { + "epoch": 0.7, + "grad_norm": 0.8344109204186879, + "learning_rate": 2.235959096340839e-06, + "loss": 0.4202, + "step": 9806 + }, + { + "epoch": 0.7, + "grad_norm": 1.5960044185409226, + "learning_rate": 2.2350015587102135e-06, + "loss": 0.5482, + "step": 9807 + }, + { + "epoch": 0.7, + "grad_norm": 1.7898930094525705, + "learning_rate": 2.2340441671361774e-06, + "loss": 0.6038, + "step": 9808 + }, + { + "epoch": 0.7, + "grad_norm": 0.7142109056622868, + "learning_rate": 2.2330869216692964e-06, + "loss": 0.4412, + "step": 9809 + }, + { + "epoch": 0.7, + "grad_norm": 1.663516948502248, + "learning_rate": 2.2321298223601433e-06, + "loss": 0.5167, + "step": 9810 + }, + { + "epoch": 0.7, + "grad_norm": 1.6814355294355856, + "learning_rate": 2.2311728692592705e-06, + "loss": 0.5294, + "step": 9811 + }, + { + "epoch": 0.7, + "grad_norm": 1.6692833489155774, + "learning_rate": 2.230216062417228e-06, + "loss": 0.5143, + "step": 9812 + }, + { + "epoch": 0.7, + "grad_norm": 2.548215115919644, + "learning_rate": 2.229259401884559e-06, + "loss": 0.5354, + "step": 9813 + }, + { + "epoch": 0.7, + "grad_norm": 0.8479091089072636, + "learning_rate": 2.228302887711797e-06, + "loss": 0.4266, + "step": 9814 + }, + { + "epoch": 0.7, + "grad_norm": 1.6494211725633305, + "learning_rate": 2.22734651994947e-06, + "loss": 0.4707, + "step": 9815 + }, + { + "epoch": 0.7, + "grad_norm": 1.6046435591145247, + "learning_rate": 2.226390298648093e-06, + "loss": 0.4934, + "step": 9816 + }, + { + "epoch": 0.7, + "grad_norm": 2.1102304856213485, + "learning_rate": 2.2254342238581787e-06, + "loss": 0.5227, + "step": 9817 + }, + { + "epoch": 0.7, + "grad_norm": 1.5254649715565505, + "learning_rate": 2.2244782956302304e-06, + "loss": 0.4821, + "step": 9818 + }, + { + "epoch": 0.7, + "grad_norm": 2.5853601028571447, + "learning_rate": 2.2235225140147427e-06, + "loss": 0.5365, + "step": 9819 + }, + { + "epoch": 0.7, + "grad_norm": 1.5959536400136145, + "learning_rate": 2.222566879062204e-06, + "loss": 0.5328, + "step": 9820 + }, + { + "epoch": 0.7, + "grad_norm": 1.9075411941456688, + "learning_rate": 2.221611390823094e-06, + "loss": 0.4509, + "step": 9821 + }, + { + "epoch": 0.7, + "grad_norm": 1.8467325386419042, + "learning_rate": 2.2206560493478873e-06, + "loss": 0.5421, + "step": 9822 + }, + { + "epoch": 0.7, + "grad_norm": 1.6682965002879802, + "learning_rate": 2.219700854687043e-06, + "loss": 0.5491, + "step": 9823 + }, + { + "epoch": 0.7, + "grad_norm": 0.69361826370397, + "learning_rate": 2.218745806891021e-06, + "loss": 0.43, + "step": 9824 + }, + { + "epoch": 0.7, + "grad_norm": 1.7758083775761881, + "learning_rate": 2.21779090601027e-06, + "loss": 0.5965, + "step": 9825 + }, + { + "epoch": 0.7, + "grad_norm": 1.5691503985185293, + "learning_rate": 2.2168361520952298e-06, + "loss": 0.5176, + "step": 9826 + }, + { + "epoch": 0.7, + "grad_norm": 1.605465088313817, + "learning_rate": 2.2158815451963373e-06, + "loss": 0.6051, + "step": 9827 + }, + { + "epoch": 0.7, + "grad_norm": 0.6824569229793276, + "learning_rate": 2.2149270853640116e-06, + "loss": 0.4446, + "step": 9828 + }, + { + "epoch": 0.7, + "grad_norm": 2.3279249262316486, + "learning_rate": 2.2139727726486775e-06, + "loss": 0.5632, + "step": 9829 + }, + { + "epoch": 0.7, + "grad_norm": 1.6630246621711562, + "learning_rate": 2.213018607100741e-06, + "loss": 0.5393, + "step": 9830 + }, + { + "epoch": 0.7, + "grad_norm": 2.153134548734179, + "learning_rate": 2.2120645887706046e-06, + "loss": 0.4906, + "step": 9831 + }, + { + "epoch": 0.7, + "grad_norm": 1.9403335042450074, + "learning_rate": 2.211110717708664e-06, + "loss": 0.5568, + "step": 9832 + }, + { + "epoch": 0.7, + "grad_norm": 1.6619787011954972, + "learning_rate": 2.2101569939653044e-06, + "loss": 0.4992, + "step": 9833 + }, + { + "epoch": 0.7, + "grad_norm": 1.9323816123535564, + "learning_rate": 2.2092034175909084e-06, + "loss": 0.5104, + "step": 9834 + }, + { + "epoch": 0.7, + "grad_norm": 0.7402635796299208, + "learning_rate": 2.208249988635842e-06, + "loss": 0.4316, + "step": 9835 + }, + { + "epoch": 0.7, + "grad_norm": 1.4628329571468286, + "learning_rate": 2.2072967071504712e-06, + "loss": 0.5113, + "step": 9836 + }, + { + "epoch": 0.7, + "grad_norm": 1.717266774038048, + "learning_rate": 2.206343573185151e-06, + "loss": 0.5407, + "step": 9837 + }, + { + "epoch": 0.7, + "grad_norm": 2.0788995618959807, + "learning_rate": 2.205390586790232e-06, + "loss": 0.5242, + "step": 9838 + }, + { + "epoch": 0.7, + "grad_norm": 2.111756982890185, + "learning_rate": 2.204437748016047e-06, + "loss": 0.5956, + "step": 9839 + }, + { + "epoch": 0.7, + "grad_norm": 1.5192303423506381, + "learning_rate": 2.2034850569129357e-06, + "loss": 0.5618, + "step": 9840 + }, + { + "epoch": 0.7, + "grad_norm": 1.6338007437450441, + "learning_rate": 2.2025325135312203e-06, + "loss": 0.5567, + "step": 9841 + }, + { + "epoch": 0.7, + "grad_norm": 1.5130919754535974, + "learning_rate": 2.2015801179212152e-06, + "loss": 0.5032, + "step": 9842 + }, + { + "epoch": 0.7, + "grad_norm": 1.6096667806755216, + "learning_rate": 2.2006278701332313e-06, + "loss": 0.5827, + "step": 9843 + }, + { + "epoch": 0.7, + "grad_norm": 1.6273355838135086, + "learning_rate": 2.199675770217568e-06, + "loss": 0.5956, + "step": 9844 + }, + { + "epoch": 0.7, + "grad_norm": 1.7929622625107424, + "learning_rate": 2.1987238182245218e-06, + "loss": 0.5004, + "step": 9845 + }, + { + "epoch": 0.7, + "grad_norm": 1.6298876602057937, + "learning_rate": 2.1977720142043737e-06, + "loss": 0.5019, + "step": 9846 + }, + { + "epoch": 0.7, + "grad_norm": 1.7730182950509759, + "learning_rate": 2.1968203582074026e-06, + "loss": 0.5146, + "step": 9847 + }, + { + "epoch": 0.7, + "grad_norm": 2.2336815756351047, + "learning_rate": 2.1958688502838787e-06, + "loss": 0.5444, + "step": 9848 + }, + { + "epoch": 0.7, + "grad_norm": 1.896585654591367, + "learning_rate": 2.194917490484064e-06, + "loss": 0.4952, + "step": 9849 + }, + { + "epoch": 0.7, + "grad_norm": 1.692158092086213, + "learning_rate": 2.1939662788582137e-06, + "loss": 0.5939, + "step": 9850 + }, + { + "epoch": 0.7, + "grad_norm": 1.7814729944899672, + "learning_rate": 2.1930152154565696e-06, + "loss": 0.5934, + "step": 9851 + }, + { + "epoch": 0.7, + "grad_norm": 1.6697437849235102, + "learning_rate": 2.1920643003293766e-06, + "loss": 0.466, + "step": 9852 + }, + { + "epoch": 0.7, + "grad_norm": 1.9675541085977006, + "learning_rate": 2.1911135335268608e-06, + "loss": 0.527, + "step": 9853 + }, + { + "epoch": 0.7, + "grad_norm": 1.7795176499679832, + "learning_rate": 2.190162915099245e-06, + "loss": 0.4953, + "step": 9854 + }, + { + "epoch": 0.7, + "grad_norm": 1.92656390973599, + "learning_rate": 2.189212445096745e-06, + "loss": 0.4927, + "step": 9855 + }, + { + "epoch": 0.7, + "grad_norm": 1.6510505905988975, + "learning_rate": 2.1882621235695685e-06, + "loss": 0.6125, + "step": 9856 + }, + { + "epoch": 0.7, + "grad_norm": 1.6943968810018013, + "learning_rate": 2.187311950567916e-06, + "loss": 0.49, + "step": 9857 + }, + { + "epoch": 0.7, + "grad_norm": 2.0429419409319003, + "learning_rate": 2.1863619261419726e-06, + "loss": 0.5593, + "step": 9858 + }, + { + "epoch": 0.7, + "grad_norm": 1.651692684158317, + "learning_rate": 2.18541205034193e-06, + "loss": 0.4789, + "step": 9859 + }, + { + "epoch": 0.7, + "grad_norm": 1.8091459818880926, + "learning_rate": 2.1844623232179583e-06, + "loss": 0.555, + "step": 9860 + }, + { + "epoch": 0.7, + "grad_norm": 1.9393682605731715, + "learning_rate": 2.1835127448202277e-06, + "loss": 0.5727, + "step": 9861 + }, + { + "epoch": 0.7, + "grad_norm": 1.8993530512545764, + "learning_rate": 2.1825633151988966e-06, + "loss": 0.5089, + "step": 9862 + }, + { + "epoch": 0.7, + "grad_norm": 0.6913309208285455, + "learning_rate": 2.181614034404118e-06, + "loss": 0.4378, + "step": 9863 + }, + { + "epoch": 0.7, + "grad_norm": 1.7115490938281233, + "learning_rate": 2.1806649024860386e-06, + "loss": 0.6026, + "step": 9864 + }, + { + "epoch": 0.7, + "grad_norm": 1.5739686785651636, + "learning_rate": 2.17971591949479e-06, + "loss": 0.5545, + "step": 9865 + }, + { + "epoch": 0.7, + "grad_norm": 0.796164169898495, + "learning_rate": 2.1787670854805033e-06, + "loss": 0.4479, + "step": 9866 + }, + { + "epoch": 0.7, + "grad_norm": 1.9650883356401943, + "learning_rate": 2.1778184004932984e-06, + "loss": 0.5535, + "step": 9867 + }, + { + "epoch": 0.7, + "grad_norm": 1.7500808946078343, + "learning_rate": 2.1768698645832883e-06, + "loss": 0.5131, + "step": 9868 + }, + { + "epoch": 0.7, + "grad_norm": 0.6794179121459604, + "learning_rate": 2.1759214778005784e-06, + "loss": 0.4105, + "step": 9869 + }, + { + "epoch": 0.7, + "grad_norm": 1.5967481894013247, + "learning_rate": 2.174973240195265e-06, + "loss": 0.5455, + "step": 9870 + }, + { + "epoch": 0.7, + "grad_norm": 2.2166597354677346, + "learning_rate": 2.1740251518174394e-06, + "loss": 0.5629, + "step": 9871 + }, + { + "epoch": 0.7, + "grad_norm": 1.7964576448836764, + "learning_rate": 2.1730772127171793e-06, + "loss": 0.4714, + "step": 9872 + }, + { + "epoch": 0.7, + "grad_norm": 1.784357296207718, + "learning_rate": 2.17212942294456e-06, + "loss": 0.5189, + "step": 9873 + }, + { + "epoch": 0.7, + "grad_norm": 1.7685191721911813, + "learning_rate": 2.1711817825496463e-06, + "loss": 0.6218, + "step": 9874 + }, + { + "epoch": 0.7, + "grad_norm": 1.7071092811119726, + "learning_rate": 2.170234291582498e-06, + "loss": 0.5367, + "step": 9875 + }, + { + "epoch": 0.7, + "grad_norm": 1.5919135967654028, + "learning_rate": 2.1692869500931618e-06, + "loss": 0.5247, + "step": 9876 + }, + { + "epoch": 0.7, + "grad_norm": 2.1371890611389315, + "learning_rate": 2.1683397581316802e-06, + "loss": 0.5324, + "step": 9877 + }, + { + "epoch": 0.7, + "grad_norm": 1.6230525319150255, + "learning_rate": 2.1673927157480874e-06, + "loss": 0.4736, + "step": 9878 + }, + { + "epoch": 0.7, + "grad_norm": 1.862684696291668, + "learning_rate": 2.1664458229924098e-06, + "loss": 0.5326, + "step": 9879 + }, + { + "epoch": 0.7, + "grad_norm": 1.4485560987420139, + "learning_rate": 2.165499079914667e-06, + "loss": 0.5291, + "step": 9880 + }, + { + "epoch": 0.7, + "grad_norm": 1.5512433332026951, + "learning_rate": 2.164552486564863e-06, + "loss": 0.4999, + "step": 9881 + }, + { + "epoch": 0.7, + "grad_norm": 1.7892125553676461, + "learning_rate": 2.1636060429930094e-06, + "loss": 0.581, + "step": 9882 + }, + { + "epoch": 0.7, + "grad_norm": 1.8407015730095222, + "learning_rate": 2.162659749249093e-06, + "loss": 0.5399, + "step": 9883 + }, + { + "epoch": 0.7, + "grad_norm": 2.0044684170019775, + "learning_rate": 2.1617136053831034e-06, + "loss": 0.5412, + "step": 9884 + }, + { + "epoch": 0.7, + "grad_norm": 1.5040287879269847, + "learning_rate": 2.1607676114450176e-06, + "loss": 0.5692, + "step": 9885 + }, + { + "epoch": 0.7, + "grad_norm": 1.8771342001352436, + "learning_rate": 2.159821767484808e-06, + "loss": 0.5771, + "step": 9886 + }, + { + "epoch": 0.7, + "grad_norm": 2.4342795875356713, + "learning_rate": 2.1588760735524384e-06, + "loss": 0.5541, + "step": 9887 + }, + { + "epoch": 0.7, + "grad_norm": 1.6926798274257615, + "learning_rate": 2.1579305296978575e-06, + "loss": 0.4983, + "step": 9888 + }, + { + "epoch": 0.7, + "grad_norm": 1.783006088232884, + "learning_rate": 2.15698513597102e-06, + "loss": 0.5558, + "step": 9889 + }, + { + "epoch": 0.7, + "grad_norm": 1.7616768180301434, + "learning_rate": 2.1560398924218593e-06, + "loss": 0.549, + "step": 9890 + }, + { + "epoch": 0.7, + "grad_norm": 1.5778620453987877, + "learning_rate": 2.1550947991003084e-06, + "loss": 0.5463, + "step": 9891 + }, + { + "epoch": 0.7, + "grad_norm": 1.7088188421028374, + "learning_rate": 2.1541498560562903e-06, + "loss": 0.4576, + "step": 9892 + }, + { + "epoch": 0.7, + "grad_norm": 1.7307619740476605, + "learning_rate": 2.1532050633397193e-06, + "loss": 0.5187, + "step": 9893 + }, + { + "epoch": 0.7, + "grad_norm": 1.530945891324916, + "learning_rate": 2.152260421000505e-06, + "loss": 0.4625, + "step": 9894 + }, + { + "epoch": 0.7, + "grad_norm": 1.7336063750900397, + "learning_rate": 2.1513159290885435e-06, + "loss": 0.5067, + "step": 9895 + }, + { + "epoch": 0.7, + "grad_norm": 1.8473967266720934, + "learning_rate": 2.150371587653726e-06, + "loss": 0.4936, + "step": 9896 + }, + { + "epoch": 0.7, + "grad_norm": 1.5198557951448186, + "learning_rate": 2.1494273967459383e-06, + "loss": 0.5426, + "step": 9897 + }, + { + "epoch": 0.7, + "grad_norm": 1.6220751413338879, + "learning_rate": 2.1484833564150542e-06, + "loss": 0.5206, + "step": 9898 + }, + { + "epoch": 0.7, + "grad_norm": 1.5808624273864196, + "learning_rate": 2.1475394667109435e-06, + "loss": 0.5054, + "step": 9899 + }, + { + "epoch": 0.7, + "grad_norm": 1.7050956412769964, + "learning_rate": 2.1465957276834592e-06, + "loss": 0.6037, + "step": 9900 + }, + { + "epoch": 0.7, + "grad_norm": 1.791247256625663, + "learning_rate": 2.1456521393824614e-06, + "loss": 0.4627, + "step": 9901 + }, + { + "epoch": 0.7, + "grad_norm": 4.210346089398442, + "learning_rate": 2.144708701857788e-06, + "loss": 0.521, + "step": 9902 + }, + { + "epoch": 0.7, + "grad_norm": 1.9144515467075263, + "learning_rate": 2.1437654151592754e-06, + "loss": 0.5148, + "step": 9903 + }, + { + "epoch": 0.7, + "grad_norm": 1.531238761544277, + "learning_rate": 2.142822279336752e-06, + "loss": 0.5856, + "step": 9904 + }, + { + "epoch": 0.7, + "grad_norm": 1.6302470641626148, + "learning_rate": 2.141879294440039e-06, + "loss": 0.522, + "step": 9905 + }, + { + "epoch": 0.7, + "grad_norm": 1.7149840605409237, + "learning_rate": 2.1409364605189447e-06, + "loss": 0.5395, + "step": 9906 + }, + { + "epoch": 0.7, + "grad_norm": 1.8506683524660588, + "learning_rate": 2.1399937776232727e-06, + "loss": 0.5091, + "step": 9907 + }, + { + "epoch": 0.7, + "grad_norm": 1.604209151273876, + "learning_rate": 2.1390512458028234e-06, + "loss": 0.5509, + "step": 9908 + }, + { + "epoch": 0.7, + "grad_norm": 1.549677829306699, + "learning_rate": 2.13810886510738e-06, + "loss": 0.4925, + "step": 9909 + }, + { + "epoch": 0.7, + "grad_norm": 0.6993336529375667, + "learning_rate": 2.137166635586726e-06, + "loss": 0.4504, + "step": 9910 + }, + { + "epoch": 0.7, + "grad_norm": 1.7255831385632994, + "learning_rate": 2.1362245572906263e-06, + "loss": 0.5201, + "step": 9911 + }, + { + "epoch": 0.7, + "grad_norm": 1.6915292071317485, + "learning_rate": 2.1352826302688538e-06, + "loss": 0.4856, + "step": 9912 + }, + { + "epoch": 0.7, + "grad_norm": 1.6425612857906209, + "learning_rate": 2.134340854571158e-06, + "loss": 0.5187, + "step": 9913 + }, + { + "epoch": 0.7, + "grad_norm": 2.983444695348457, + "learning_rate": 2.1333992302472876e-06, + "loss": 0.5638, + "step": 9914 + }, + { + "epoch": 0.7, + "grad_norm": 1.9646144756486192, + "learning_rate": 2.132457757346984e-06, + "loss": 0.5812, + "step": 9915 + }, + { + "epoch": 0.7, + "grad_norm": 1.9553779286653636, + "learning_rate": 2.131516435919978e-06, + "loss": 0.4756, + "step": 9916 + }, + { + "epoch": 0.7, + "grad_norm": 1.4336064972879234, + "learning_rate": 2.1305752660159956e-06, + "loss": 0.4705, + "step": 9917 + }, + { + "epoch": 0.7, + "grad_norm": 1.7200076168128053, + "learning_rate": 2.1296342476847467e-06, + "loss": 0.5299, + "step": 9918 + }, + { + "epoch": 0.7, + "grad_norm": 1.6441341656632302, + "learning_rate": 2.1286933809759465e-06, + "loss": 0.5767, + "step": 9919 + }, + { + "epoch": 0.7, + "grad_norm": 1.796907382138991, + "learning_rate": 2.1277526659392896e-06, + "loss": 0.5108, + "step": 9920 + }, + { + "epoch": 0.7, + "grad_norm": 1.6966933440186764, + "learning_rate": 2.1268121026244693e-06, + "loss": 0.5634, + "step": 9921 + }, + { + "epoch": 0.7, + "grad_norm": 1.8589131947080262, + "learning_rate": 2.1258716910811692e-06, + "loss": 0.5918, + "step": 9922 + }, + { + "epoch": 0.7, + "grad_norm": 1.941100492529861, + "learning_rate": 2.1249314313590657e-06, + "loss": 0.5784, + "step": 9923 + }, + { + "epoch": 0.7, + "grad_norm": 1.8299830228926899, + "learning_rate": 2.1239913235078275e-06, + "loss": 0.5999, + "step": 9924 + }, + { + "epoch": 0.7, + "grad_norm": 1.5845347392542977, + "learning_rate": 2.1230513675771114e-06, + "loss": 0.4894, + "step": 9925 + }, + { + "epoch": 0.7, + "grad_norm": 1.5827159938835709, + "learning_rate": 2.12211156361657e-06, + "loss": 0.5231, + "step": 9926 + }, + { + "epoch": 0.7, + "grad_norm": 1.743811289047712, + "learning_rate": 2.121171911675848e-06, + "loss": 0.5761, + "step": 9927 + }, + { + "epoch": 0.7, + "grad_norm": 1.8250149600021641, + "learning_rate": 2.1202324118045805e-06, + "loss": 0.531, + "step": 9928 + }, + { + "epoch": 0.7, + "grad_norm": 1.6651945715853556, + "learning_rate": 2.119293064052396e-06, + "loss": 0.5318, + "step": 9929 + }, + { + "epoch": 0.7, + "grad_norm": 1.5509718645062733, + "learning_rate": 2.1183538684689097e-06, + "loss": 0.5924, + "step": 9930 + }, + { + "epoch": 0.7, + "grad_norm": 0.6900881964781389, + "learning_rate": 2.117414825103741e-06, + "loss": 0.4445, + "step": 9931 + }, + { + "epoch": 0.7, + "grad_norm": 0.7442753839838445, + "learning_rate": 2.1164759340064854e-06, + "loss": 0.4433, + "step": 9932 + }, + { + "epoch": 0.7, + "grad_norm": 1.6131919110575181, + "learning_rate": 2.115537195226743e-06, + "loss": 0.5549, + "step": 9933 + }, + { + "epoch": 0.7, + "grad_norm": 1.394578529127927, + "learning_rate": 2.114598608814099e-06, + "loss": 0.4713, + "step": 9934 + }, + { + "epoch": 0.71, + "grad_norm": 1.901474818385091, + "learning_rate": 2.113660174818134e-06, + "loss": 0.5175, + "step": 9935 + }, + { + "epoch": 0.71, + "grad_norm": 1.7659107212231282, + "learning_rate": 2.1127218932884205e-06, + "loss": 0.5792, + "step": 9936 + }, + { + "epoch": 0.71, + "grad_norm": 1.8245121512516913, + "learning_rate": 2.1117837642745164e-06, + "loss": 0.5466, + "step": 9937 + }, + { + "epoch": 0.71, + "grad_norm": 1.8890652163218158, + "learning_rate": 2.110845787825984e-06, + "loss": 0.4439, + "step": 9938 + }, + { + "epoch": 0.71, + "grad_norm": 1.730373075654687, + "learning_rate": 2.1099079639923653e-06, + "loss": 0.547, + "step": 9939 + }, + { + "epoch": 0.71, + "grad_norm": 2.190909436660858, + "learning_rate": 2.1089702928232024e-06, + "loss": 0.4652, + "step": 9940 + }, + { + "epoch": 0.71, + "grad_norm": 1.5059507893783808, + "learning_rate": 2.108032774368021e-06, + "loss": 0.4661, + "step": 9941 + }, + { + "epoch": 0.71, + "grad_norm": 1.732801463097491, + "learning_rate": 2.1070954086763516e-06, + "loss": 0.5961, + "step": 9942 + }, + { + "epoch": 0.71, + "grad_norm": 0.7000885972452211, + "learning_rate": 2.1061581957977038e-06, + "loss": 0.4225, + "step": 9943 + }, + { + "epoch": 0.71, + "grad_norm": 18.11123632557978, + "learning_rate": 2.1052211357815856e-06, + "loss": 0.4908, + "step": 9944 + }, + { + "epoch": 0.71, + "grad_norm": 2.5322755475069454, + "learning_rate": 2.1042842286774958e-06, + "loss": 0.5598, + "step": 9945 + }, + { + "epoch": 0.71, + "grad_norm": 2.0519288271184726, + "learning_rate": 2.1033474745349253e-06, + "loss": 0.5944, + "step": 9946 + }, + { + "epoch": 0.71, + "grad_norm": 1.9316259934194702, + "learning_rate": 2.102410873403358e-06, + "loss": 0.5276, + "step": 9947 + }, + { + "epoch": 0.71, + "grad_norm": 1.8346813140060125, + "learning_rate": 2.1014744253322626e-06, + "loss": 0.5001, + "step": 9948 + }, + { + "epoch": 0.71, + "grad_norm": 1.9105347175477196, + "learning_rate": 2.1005381303711136e-06, + "loss": 0.6416, + "step": 9949 + }, + { + "epoch": 0.71, + "grad_norm": 1.9403395751476167, + "learning_rate": 2.0996019885693635e-06, + "loss": 0.4989, + "step": 9950 + }, + { + "epoch": 0.71, + "grad_norm": 1.5981535783815422, + "learning_rate": 2.0986659999764646e-06, + "loss": 0.4915, + "step": 9951 + }, + { + "epoch": 0.71, + "grad_norm": 2.942300733214206, + "learning_rate": 2.0977301646418587e-06, + "loss": 0.528, + "step": 9952 + }, + { + "epoch": 0.71, + "grad_norm": 1.969779062066473, + "learning_rate": 2.096794482614979e-06, + "loss": 0.4856, + "step": 9953 + }, + { + "epoch": 0.71, + "grad_norm": 1.687118497364785, + "learning_rate": 2.0958589539452547e-06, + "loss": 0.5738, + "step": 9954 + }, + { + "epoch": 0.71, + "grad_norm": 1.490783632983565, + "learning_rate": 2.094923578682098e-06, + "loss": 0.428, + "step": 9955 + }, + { + "epoch": 0.71, + "grad_norm": 2.0600156675526136, + "learning_rate": 2.093988356874923e-06, + "loss": 0.5557, + "step": 9956 + }, + { + "epoch": 0.71, + "grad_norm": 1.7654964953611865, + "learning_rate": 2.093053288573129e-06, + "loss": 0.5096, + "step": 9957 + }, + { + "epoch": 0.71, + "grad_norm": 1.7636164408026669, + "learning_rate": 2.0921183738261107e-06, + "loss": 0.5153, + "step": 9958 + }, + { + "epoch": 0.71, + "grad_norm": 1.6941360392035278, + "learning_rate": 2.091183612683255e-06, + "loss": 0.5498, + "step": 9959 + }, + { + "epoch": 0.71, + "grad_norm": 1.6616591593769634, + "learning_rate": 2.090249005193934e-06, + "loss": 0.5101, + "step": 9960 + }, + { + "epoch": 0.71, + "grad_norm": 1.6161933862388798, + "learning_rate": 2.0893145514075232e-06, + "loss": 0.4937, + "step": 9961 + }, + { + "epoch": 0.71, + "grad_norm": 0.6336901559186278, + "learning_rate": 2.088380251373379e-06, + "loss": 0.4138, + "step": 9962 + }, + { + "epoch": 0.71, + "grad_norm": 1.5739663939060524, + "learning_rate": 2.0874461051408556e-06, + "loss": 0.5507, + "step": 9963 + }, + { + "epoch": 0.71, + "grad_norm": 1.9702078571105015, + "learning_rate": 2.0865121127592987e-06, + "loss": 0.5827, + "step": 9964 + }, + { + "epoch": 0.71, + "grad_norm": 1.5380812570872537, + "learning_rate": 2.0855782742780434e-06, + "loss": 0.4696, + "step": 9965 + }, + { + "epoch": 0.71, + "grad_norm": 1.6077312661243806, + "learning_rate": 2.0846445897464213e-06, + "loss": 0.5261, + "step": 9966 + }, + { + "epoch": 0.71, + "grad_norm": 1.8894270389324255, + "learning_rate": 2.0837110592137467e-06, + "loss": 0.4776, + "step": 9967 + }, + { + "epoch": 0.71, + "grad_norm": 1.705222215828887, + "learning_rate": 2.0827776827293393e-06, + "loss": 0.4751, + "step": 9968 + }, + { + "epoch": 0.71, + "grad_norm": 1.6187646536575793, + "learning_rate": 2.0818444603424982e-06, + "loss": 0.4934, + "step": 9969 + }, + { + "epoch": 0.71, + "grad_norm": 1.6638889343405148, + "learning_rate": 2.080911392102522e-06, + "loss": 0.5269, + "step": 9970 + }, + { + "epoch": 0.71, + "grad_norm": 3.339380621470723, + "learning_rate": 2.079978478058694e-06, + "loss": 0.4774, + "step": 9971 + }, + { + "epoch": 0.71, + "grad_norm": 1.6601567588991704, + "learning_rate": 2.079045718260299e-06, + "loss": 0.5537, + "step": 9972 + }, + { + "epoch": 0.71, + "grad_norm": 1.5730684604889715, + "learning_rate": 2.0781131127566086e-06, + "loss": 0.4779, + "step": 9973 + }, + { + "epoch": 0.71, + "grad_norm": 1.859461940431205, + "learning_rate": 2.077180661596882e-06, + "loss": 0.4691, + "step": 9974 + }, + { + "epoch": 0.71, + "grad_norm": 2.614613212115766, + "learning_rate": 2.076248364830377e-06, + "loss": 0.4727, + "step": 9975 + }, + { + "epoch": 0.71, + "grad_norm": 1.5513138709835115, + "learning_rate": 2.0753162225063407e-06, + "loss": 0.5157, + "step": 9976 + }, + { + "epoch": 0.71, + "grad_norm": 1.9260043149397121, + "learning_rate": 2.0743842346740135e-06, + "loss": 0.5422, + "step": 9977 + }, + { + "epoch": 0.71, + "grad_norm": 1.7233955526042861, + "learning_rate": 2.073452401382622e-06, + "loss": 0.5393, + "step": 9978 + }, + { + "epoch": 0.71, + "grad_norm": 1.7340521529462616, + "learning_rate": 2.0725207226813915e-06, + "loss": 0.4698, + "step": 9979 + }, + { + "epoch": 0.71, + "grad_norm": 1.742981085043424, + "learning_rate": 2.0715891986195358e-06, + "loss": 0.5865, + "step": 9980 + }, + { + "epoch": 0.71, + "grad_norm": 0.6950640925626478, + "learning_rate": 2.070657829246262e-06, + "loss": 0.415, + "step": 9981 + }, + { + "epoch": 0.71, + "grad_norm": 1.7664743906897409, + "learning_rate": 2.069726614610767e-06, + "loss": 0.5493, + "step": 9982 + }, + { + "epoch": 0.71, + "grad_norm": 0.720725083725896, + "learning_rate": 2.0687955547622417e-06, + "loss": 0.4261, + "step": 9983 + }, + { + "epoch": 0.71, + "grad_norm": 0.6738589870009388, + "learning_rate": 2.0678646497498695e-06, + "loss": 0.4066, + "step": 9984 + }, + { + "epoch": 0.71, + "grad_norm": 1.6682323426659522, + "learning_rate": 2.06693389962282e-06, + "loss": 0.5078, + "step": 9985 + }, + { + "epoch": 0.71, + "grad_norm": 1.4430006479831332, + "learning_rate": 2.06600330443026e-06, + "loss": 0.4743, + "step": 9986 + }, + { + "epoch": 0.71, + "grad_norm": 1.6343820224980672, + "learning_rate": 2.0650728642213472e-06, + "loss": 0.5544, + "step": 9987 + }, + { + "epoch": 0.71, + "grad_norm": 1.695608677489692, + "learning_rate": 2.0641425790452314e-06, + "loss": 0.5506, + "step": 9988 + }, + { + "epoch": 0.71, + "grad_norm": 2.1445091623919015, + "learning_rate": 2.063212448951054e-06, + "loss": 0.4959, + "step": 9989 + }, + { + "epoch": 0.71, + "grad_norm": 1.6914704891935786, + "learning_rate": 2.062282473987943e-06, + "loss": 0.4977, + "step": 9990 + }, + { + "epoch": 0.71, + "grad_norm": 1.5501811386175672, + "learning_rate": 2.061352654205029e-06, + "loss": 0.4878, + "step": 9991 + }, + { + "epoch": 0.71, + "grad_norm": 1.9027966683377582, + "learning_rate": 2.060422989651424e-06, + "loss": 0.4886, + "step": 9992 + }, + { + "epoch": 0.71, + "grad_norm": 1.6721796303840772, + "learning_rate": 2.0594934803762368e-06, + "loss": 0.4935, + "step": 9993 + }, + { + "epoch": 0.71, + "grad_norm": 3.7335766028855946, + "learning_rate": 2.0585641264285684e-06, + "loss": 0.5438, + "step": 9994 + }, + { + "epoch": 0.71, + "grad_norm": 1.8491462149443747, + "learning_rate": 2.0576349278575092e-06, + "loss": 0.5706, + "step": 9995 + }, + { + "epoch": 0.71, + "grad_norm": 0.7512657107728453, + "learning_rate": 2.056705884712145e-06, + "loss": 0.4362, + "step": 9996 + }, + { + "epoch": 0.71, + "grad_norm": 1.6982321154788775, + "learning_rate": 2.0557769970415463e-06, + "loss": 0.4093, + "step": 9997 + }, + { + "epoch": 0.71, + "grad_norm": 1.6049880358389776, + "learning_rate": 2.054848264894786e-06, + "loss": 0.506, + "step": 9998 + }, + { + "epoch": 0.71, + "grad_norm": 1.6503151460510972, + "learning_rate": 2.0539196883209185e-06, + "loss": 0.5495, + "step": 9999 + }, + { + "epoch": 0.71, + "grad_norm": 0.755921830268919, + "learning_rate": 2.0529912673689958e-06, + "loss": 0.4594, + "step": 10000 + }, + { + "epoch": 0.71, + "grad_norm": 0.7185551331740226, + "learning_rate": 2.0520630020880593e-06, + "loss": 0.4763, + "step": 10001 + }, + { + "epoch": 0.71, + "grad_norm": 2.332056365899894, + "learning_rate": 2.0511348925271447e-06, + "loss": 0.4357, + "step": 10002 + }, + { + "epoch": 0.71, + "grad_norm": 1.9177014971361952, + "learning_rate": 2.050206938735279e-06, + "loss": 0.556, + "step": 10003 + }, + { + "epoch": 0.71, + "grad_norm": 1.6691278121121338, + "learning_rate": 2.0492791407614764e-06, + "loss": 0.5201, + "step": 10004 + }, + { + "epoch": 0.71, + "grad_norm": 1.764658783727552, + "learning_rate": 2.0483514986547478e-06, + "loss": 0.5243, + "step": 10005 + }, + { + "epoch": 0.71, + "grad_norm": 1.823389461878058, + "learning_rate": 2.0474240124640944e-06, + "loss": 0.5354, + "step": 10006 + }, + { + "epoch": 0.71, + "grad_norm": 1.4845856701293219, + "learning_rate": 2.046496682238511e-06, + "loss": 0.551, + "step": 10007 + }, + { + "epoch": 0.71, + "grad_norm": 1.691274862136385, + "learning_rate": 2.0455695080269796e-06, + "loss": 0.5133, + "step": 10008 + }, + { + "epoch": 0.71, + "grad_norm": 0.7206217222903164, + "learning_rate": 2.044642489878477e-06, + "loss": 0.44, + "step": 10009 + }, + { + "epoch": 0.71, + "grad_norm": 1.890543042895478, + "learning_rate": 2.043715627841973e-06, + "loss": 0.5277, + "step": 10010 + }, + { + "epoch": 0.71, + "grad_norm": 0.7391971754713759, + "learning_rate": 2.042788921966426e-06, + "loss": 0.4477, + "step": 10011 + }, + { + "epoch": 0.71, + "grad_norm": 1.4667832561720997, + "learning_rate": 2.041862372300791e-06, + "loss": 0.4574, + "step": 10012 + }, + { + "epoch": 0.71, + "grad_norm": 1.720710947376018, + "learning_rate": 2.040935978894005e-06, + "loss": 0.5395, + "step": 10013 + }, + { + "epoch": 0.71, + "grad_norm": 1.697987115620822, + "learning_rate": 2.040009741795011e-06, + "loss": 0.5375, + "step": 10014 + }, + { + "epoch": 0.71, + "grad_norm": 1.6094444439066218, + "learning_rate": 2.0390836610527304e-06, + "loss": 0.5655, + "step": 10015 + }, + { + "epoch": 0.71, + "grad_norm": 1.6892549418071816, + "learning_rate": 2.0381577367160836e-06, + "loss": 0.5061, + "step": 10016 + }, + { + "epoch": 0.71, + "grad_norm": 1.6895879876227233, + "learning_rate": 2.037231968833982e-06, + "loss": 0.5359, + "step": 10017 + }, + { + "epoch": 0.71, + "grad_norm": 2.5068357294279298, + "learning_rate": 2.036306357455326e-06, + "loss": 0.5589, + "step": 10018 + }, + { + "epoch": 0.71, + "grad_norm": 2.030089620057589, + "learning_rate": 2.0353809026290134e-06, + "loss": 0.5161, + "step": 10019 + }, + { + "epoch": 0.71, + "grad_norm": 1.9121564902961647, + "learning_rate": 2.0344556044039225e-06, + "loss": 0.4851, + "step": 10020 + }, + { + "epoch": 0.71, + "grad_norm": 1.7450171086894917, + "learning_rate": 2.0335304628289393e-06, + "loss": 0.5286, + "step": 10021 + }, + { + "epoch": 0.71, + "grad_norm": 1.8591463052626689, + "learning_rate": 2.032605477952927e-06, + "loss": 0.5616, + "step": 10022 + }, + { + "epoch": 0.71, + "grad_norm": 1.733764481955262, + "learning_rate": 2.0316806498247483e-06, + "loss": 0.4767, + "step": 10023 + }, + { + "epoch": 0.71, + "grad_norm": 0.7409600811666619, + "learning_rate": 2.0307559784932555e-06, + "loss": 0.4413, + "step": 10024 + }, + { + "epoch": 0.71, + "grad_norm": 1.6508008317646463, + "learning_rate": 2.029831464007293e-06, + "loss": 0.4937, + "step": 10025 + }, + { + "epoch": 0.71, + "grad_norm": 1.6260427651307787, + "learning_rate": 2.028907106415699e-06, + "loss": 0.5697, + "step": 10026 + }, + { + "epoch": 0.71, + "grad_norm": 1.5445609987650661, + "learning_rate": 2.027982905767297e-06, + "loss": 0.4651, + "step": 10027 + }, + { + "epoch": 0.71, + "grad_norm": 1.5468760798195007, + "learning_rate": 2.027058862110909e-06, + "loss": 0.5172, + "step": 10028 + }, + { + "epoch": 0.71, + "grad_norm": 1.7196611156189003, + "learning_rate": 2.026134975495345e-06, + "loss": 0.499, + "step": 10029 + }, + { + "epoch": 0.71, + "grad_norm": 1.6681550892510362, + "learning_rate": 2.0252112459694086e-06, + "loss": 0.4596, + "step": 10030 + }, + { + "epoch": 0.71, + "grad_norm": 1.7160117676270987, + "learning_rate": 2.0242876735818946e-06, + "loss": 0.5368, + "step": 10031 + }, + { + "epoch": 0.71, + "grad_norm": 1.9305106955863673, + "learning_rate": 2.023364258381588e-06, + "loss": 0.4537, + "step": 10032 + }, + { + "epoch": 0.71, + "grad_norm": 1.658694782263401, + "learning_rate": 2.0224410004172696e-06, + "loss": 0.5356, + "step": 10033 + }, + { + "epoch": 0.71, + "grad_norm": 0.7106424205017708, + "learning_rate": 2.0215178997377054e-06, + "loss": 0.4337, + "step": 10034 + }, + { + "epoch": 0.71, + "grad_norm": 1.707212101921754, + "learning_rate": 2.0205949563916575e-06, + "loss": 0.5301, + "step": 10035 + }, + { + "epoch": 0.71, + "grad_norm": 1.4546355569917444, + "learning_rate": 2.0196721704278804e-06, + "loss": 0.4561, + "step": 10036 + }, + { + "epoch": 0.71, + "grad_norm": 1.56270525769816, + "learning_rate": 2.0187495418951197e-06, + "loss": 0.4868, + "step": 10037 + }, + { + "epoch": 0.71, + "grad_norm": 1.7915481983474573, + "learning_rate": 2.0178270708421076e-06, + "loss": 0.5415, + "step": 10038 + }, + { + "epoch": 0.71, + "grad_norm": 2.2579044375118738, + "learning_rate": 2.0169047573175733e-06, + "loss": 0.5625, + "step": 10039 + }, + { + "epoch": 0.71, + "grad_norm": 0.6889215472110888, + "learning_rate": 2.0159826013702416e-06, + "loss": 0.4206, + "step": 10040 + }, + { + "epoch": 0.71, + "grad_norm": 2.513439337019751, + "learning_rate": 2.015060603048818e-06, + "loss": 0.5577, + "step": 10041 + }, + { + "epoch": 0.71, + "grad_norm": 1.7193556687057865, + "learning_rate": 2.0141387624020096e-06, + "loss": 0.4562, + "step": 10042 + }, + { + "epoch": 0.71, + "grad_norm": 3.0039910407030352, + "learning_rate": 2.0132170794785057e-06, + "loss": 0.5261, + "step": 10043 + }, + { + "epoch": 0.71, + "grad_norm": 2.0860928941872263, + "learning_rate": 2.0122955543269996e-06, + "loss": 0.5415, + "step": 10044 + }, + { + "epoch": 0.71, + "grad_norm": 2.519010682111012, + "learning_rate": 2.0113741869961646e-06, + "loss": 0.5504, + "step": 10045 + }, + { + "epoch": 0.71, + "grad_norm": 1.7348591207037212, + "learning_rate": 2.0104529775346714e-06, + "loss": 0.5456, + "step": 10046 + }, + { + "epoch": 0.71, + "grad_norm": 0.680252512283667, + "learning_rate": 2.0095319259911824e-06, + "loss": 0.4059, + "step": 10047 + }, + { + "epoch": 0.71, + "grad_norm": 2.1906381331267064, + "learning_rate": 2.00861103241435e-06, + "loss": 0.5123, + "step": 10048 + }, + { + "epoch": 0.71, + "grad_norm": 1.6990731157674086, + "learning_rate": 2.007690296852821e-06, + "loss": 0.5451, + "step": 10049 + }, + { + "epoch": 0.71, + "grad_norm": 2.329274581098137, + "learning_rate": 2.006769719355226e-06, + "loss": 0.5085, + "step": 10050 + }, + { + "epoch": 0.71, + "grad_norm": 2.145496350510256, + "learning_rate": 2.0058492999702006e-06, + "loss": 0.5865, + "step": 10051 + }, + { + "epoch": 0.71, + "grad_norm": 9.610698936107974, + "learning_rate": 2.004929038746359e-06, + "loss": 0.5431, + "step": 10052 + }, + { + "epoch": 0.71, + "grad_norm": 1.643859131999563, + "learning_rate": 2.004008935732314e-06, + "loss": 0.5535, + "step": 10053 + }, + { + "epoch": 0.71, + "grad_norm": 1.7835010202990726, + "learning_rate": 2.0030889909766692e-06, + "loss": 0.4887, + "step": 10054 + }, + { + "epoch": 0.71, + "grad_norm": 1.4438750642282292, + "learning_rate": 2.0021692045280185e-06, + "loss": 0.4685, + "step": 10055 + }, + { + "epoch": 0.71, + "grad_norm": 0.6856412906292524, + "learning_rate": 2.0012495764349504e-06, + "loss": 0.4246, + "step": 10056 + }, + { + "epoch": 0.71, + "grad_norm": 1.708480894805625, + "learning_rate": 2.000330106746039e-06, + "loss": 0.5282, + "step": 10057 + }, + { + "epoch": 0.71, + "grad_norm": 1.597876649783731, + "learning_rate": 1.9994107955098556e-06, + "loss": 0.489, + "step": 10058 + }, + { + "epoch": 0.71, + "grad_norm": 1.637834250722319, + "learning_rate": 1.9984916427749614e-06, + "loss": 0.5034, + "step": 10059 + }, + { + "epoch": 0.71, + "grad_norm": 0.7198341267726184, + "learning_rate": 1.997572648589909e-06, + "loss": 0.4348, + "step": 10060 + }, + { + "epoch": 0.71, + "grad_norm": 1.6196059487939933, + "learning_rate": 1.9966538130032432e-06, + "loss": 0.5687, + "step": 10061 + }, + { + "epoch": 0.71, + "grad_norm": 1.8433459073082177, + "learning_rate": 1.9957351360634997e-06, + "loss": 0.4837, + "step": 10062 + }, + { + "epoch": 0.71, + "grad_norm": 1.5571865193298435, + "learning_rate": 1.9948166178192075e-06, + "loss": 0.4866, + "step": 10063 + }, + { + "epoch": 0.71, + "grad_norm": 1.6026021881759482, + "learning_rate": 1.9938982583188832e-06, + "loss": 0.4646, + "step": 10064 + }, + { + "epoch": 0.71, + "grad_norm": 1.8695350842122758, + "learning_rate": 1.9929800576110388e-06, + "loss": 0.5297, + "step": 10065 + }, + { + "epoch": 0.71, + "grad_norm": 1.8618913718011791, + "learning_rate": 1.992062015744177e-06, + "loss": 0.5668, + "step": 10066 + }, + { + "epoch": 0.71, + "grad_norm": 0.662078744700633, + "learning_rate": 1.9911441327667913e-06, + "loss": 0.3905, + "step": 10067 + }, + { + "epoch": 0.71, + "grad_norm": 1.5209710953154347, + "learning_rate": 1.9902264087273706e-06, + "loss": 0.517, + "step": 10068 + }, + { + "epoch": 0.71, + "grad_norm": 1.579016550775007, + "learning_rate": 1.9893088436743853e-06, + "loss": 0.5323, + "step": 10069 + }, + { + "epoch": 0.71, + "grad_norm": 2.396054010838562, + "learning_rate": 1.9883914376563117e-06, + "loss": 0.5014, + "step": 10070 + }, + { + "epoch": 0.71, + "grad_norm": 1.6429848932711697, + "learning_rate": 1.9874741907216062e-06, + "loss": 0.4736, + "step": 10071 + }, + { + "epoch": 0.71, + "grad_norm": 1.6153335841768872, + "learning_rate": 1.986557102918723e-06, + "loss": 0.5303, + "step": 10072 + }, + { + "epoch": 0.71, + "grad_norm": 1.5355684566033818, + "learning_rate": 1.985640174296101e-06, + "loss": 0.4801, + "step": 10073 + }, + { + "epoch": 0.71, + "grad_norm": 1.6312232181855317, + "learning_rate": 1.984723404902183e-06, + "loss": 0.5701, + "step": 10074 + }, + { + "epoch": 0.71, + "grad_norm": 1.8490795624029488, + "learning_rate": 1.9838067947853895e-06, + "loss": 0.4548, + "step": 10075 + }, + { + "epoch": 0.72, + "grad_norm": 1.731555757474522, + "learning_rate": 1.982890343994142e-06, + "loss": 0.4979, + "step": 10076 + }, + { + "epoch": 0.72, + "grad_norm": 1.4902044317430612, + "learning_rate": 1.9819740525768495e-06, + "loss": 0.5137, + "step": 10077 + }, + { + "epoch": 0.72, + "grad_norm": 2.2470842006939598, + "learning_rate": 1.981057920581914e-06, + "loss": 0.5374, + "step": 10078 + }, + { + "epoch": 0.72, + "grad_norm": 1.6561644236104762, + "learning_rate": 1.9801419480577312e-06, + "loss": 0.5384, + "step": 10079 + }, + { + "epoch": 0.72, + "grad_norm": 2.116363150450398, + "learning_rate": 1.9792261350526788e-06, + "loss": 0.5394, + "step": 10080 + }, + { + "epoch": 0.72, + "grad_norm": 1.6473487288245503, + "learning_rate": 1.9783104816151416e-06, + "loss": 0.5394, + "step": 10081 + }, + { + "epoch": 0.72, + "grad_norm": 1.6328414660031356, + "learning_rate": 1.9773949877934823e-06, + "loss": 0.5535, + "step": 10082 + }, + { + "epoch": 0.72, + "grad_norm": 1.513138516992385, + "learning_rate": 1.9764796536360614e-06, + "loss": 0.5734, + "step": 10083 + }, + { + "epoch": 0.72, + "grad_norm": 0.6887886949931826, + "learning_rate": 1.9755644791912306e-06, + "loss": 0.4091, + "step": 10084 + }, + { + "epoch": 0.72, + "grad_norm": 1.7335834319728325, + "learning_rate": 1.9746494645073316e-06, + "loss": 0.5637, + "step": 10085 + }, + { + "epoch": 0.72, + "grad_norm": 1.74170071733523, + "learning_rate": 1.973734609632701e-06, + "loss": 0.5192, + "step": 10086 + }, + { + "epoch": 0.72, + "grad_norm": 2.4174786624232327, + "learning_rate": 1.9728199146156613e-06, + "loss": 0.4703, + "step": 10087 + }, + { + "epoch": 0.72, + "grad_norm": 1.8114084093920053, + "learning_rate": 1.971905379504531e-06, + "loss": 0.5336, + "step": 10088 + }, + { + "epoch": 0.72, + "grad_norm": 1.671948551175786, + "learning_rate": 1.970991004347619e-06, + "loss": 0.452, + "step": 10089 + }, + { + "epoch": 0.72, + "grad_norm": 1.6322878195638608, + "learning_rate": 1.9700767891932264e-06, + "loss": 0.4871, + "step": 10090 + }, + { + "epoch": 0.72, + "grad_norm": 2.1992140648607155, + "learning_rate": 1.9691627340896456e-06, + "loss": 0.4972, + "step": 10091 + }, + { + "epoch": 0.72, + "grad_norm": 1.6719171978075562, + "learning_rate": 1.9682488390851563e-06, + "loss": 0.4982, + "step": 10092 + }, + { + "epoch": 0.72, + "grad_norm": 1.91694735454238, + "learning_rate": 1.967335104228039e-06, + "loss": 0.5145, + "step": 10093 + }, + { + "epoch": 0.72, + "grad_norm": 1.5409815435994731, + "learning_rate": 1.9664215295665566e-06, + "loss": 0.5419, + "step": 10094 + }, + { + "epoch": 0.72, + "grad_norm": 1.6420082185492388, + "learning_rate": 1.965508115148968e-06, + "loss": 0.5938, + "step": 10095 + }, + { + "epoch": 0.72, + "grad_norm": 1.7435130421164953, + "learning_rate": 1.9645948610235226e-06, + "loss": 0.4849, + "step": 10096 + }, + { + "epoch": 0.72, + "grad_norm": 1.7898826957947112, + "learning_rate": 1.963681767238462e-06, + "loss": 0.4675, + "step": 10097 + }, + { + "epoch": 0.72, + "grad_norm": 4.326302976313969, + "learning_rate": 1.9627688338420215e-06, + "loss": 0.5551, + "step": 10098 + }, + { + "epoch": 0.72, + "grad_norm": 1.7631136985024314, + "learning_rate": 1.9618560608824182e-06, + "loss": 0.5291, + "step": 10099 + }, + { + "epoch": 0.72, + "grad_norm": 0.7686811039016653, + "learning_rate": 1.9609434484078766e-06, + "loss": 0.423, + "step": 10100 + }, + { + "epoch": 0.72, + "grad_norm": 2.1277402714003295, + "learning_rate": 1.9600309964665975e-06, + "loss": 0.5928, + "step": 10101 + }, + { + "epoch": 0.72, + "grad_norm": 1.8327210176143773, + "learning_rate": 1.9591187051067837e-06, + "loss": 0.5669, + "step": 10102 + }, + { + "epoch": 0.72, + "grad_norm": 1.6169882192816916, + "learning_rate": 1.958206574376621e-06, + "loss": 0.5169, + "step": 10103 + }, + { + "epoch": 0.72, + "grad_norm": 1.5658756958339162, + "learning_rate": 1.9572946043242952e-06, + "loss": 0.4979, + "step": 10104 + }, + { + "epoch": 0.72, + "grad_norm": 0.7378599888194799, + "learning_rate": 1.9563827949979806e-06, + "loss": 0.4229, + "step": 10105 + }, + { + "epoch": 0.72, + "grad_norm": 1.8160774046571133, + "learning_rate": 1.9554711464458382e-06, + "loss": 0.486, + "step": 10106 + }, + { + "epoch": 0.72, + "grad_norm": 0.7182979131187212, + "learning_rate": 1.954559658716026e-06, + "loss": 0.4202, + "step": 10107 + }, + { + "epoch": 0.72, + "grad_norm": 1.9079709123471837, + "learning_rate": 1.953648331856692e-06, + "loss": 0.5314, + "step": 10108 + }, + { + "epoch": 0.72, + "grad_norm": 1.7943206597427712, + "learning_rate": 1.952737165915977e-06, + "loss": 0.5522, + "step": 10109 + }, + { + "epoch": 0.72, + "grad_norm": 1.662380167528331, + "learning_rate": 1.9518261609420075e-06, + "loss": 0.5353, + "step": 10110 + }, + { + "epoch": 0.72, + "grad_norm": 1.7030795007923176, + "learning_rate": 1.950915316982912e-06, + "loss": 0.5456, + "step": 10111 + }, + { + "epoch": 0.72, + "grad_norm": 1.770626312492131, + "learning_rate": 1.950004634086799e-06, + "loss": 0.5915, + "step": 10112 + }, + { + "epoch": 0.72, + "grad_norm": 1.771650854103482, + "learning_rate": 1.9490941123017766e-06, + "loss": 0.5233, + "step": 10113 + }, + { + "epoch": 0.72, + "grad_norm": 1.958356019497405, + "learning_rate": 1.948183751675941e-06, + "loss": 0.5672, + "step": 10114 + }, + { + "epoch": 0.72, + "grad_norm": 1.8554772181892192, + "learning_rate": 1.9472735522573805e-06, + "loss": 0.5028, + "step": 10115 + }, + { + "epoch": 0.72, + "grad_norm": 2.216694865231501, + "learning_rate": 1.9463635140941766e-06, + "loss": 0.5235, + "step": 10116 + }, + { + "epoch": 0.72, + "grad_norm": 0.7005744679959377, + "learning_rate": 1.9454536372343974e-06, + "loss": 0.4374, + "step": 10117 + }, + { + "epoch": 0.72, + "grad_norm": 1.7787420612367, + "learning_rate": 1.9445439217261073e-06, + "loss": 0.4732, + "step": 10118 + }, + { + "epoch": 0.72, + "grad_norm": 2.5364020820231348, + "learning_rate": 1.943634367617361e-06, + "loss": 0.5545, + "step": 10119 + }, + { + "epoch": 0.72, + "grad_norm": 1.8722586791997047, + "learning_rate": 1.9427249749562034e-06, + "loss": 0.5495, + "step": 10120 + }, + { + "epoch": 0.72, + "grad_norm": 1.742561440776197, + "learning_rate": 1.9418157437906737e-06, + "loss": 0.5315, + "step": 10121 + }, + { + "epoch": 0.72, + "grad_norm": 1.7082117290891496, + "learning_rate": 1.9409066741687952e-06, + "loss": 0.5498, + "step": 10122 + }, + { + "epoch": 0.72, + "grad_norm": 2.0873933150457162, + "learning_rate": 1.939997766138596e-06, + "loss": 0.5089, + "step": 10123 + }, + { + "epoch": 0.72, + "grad_norm": 0.751027603485962, + "learning_rate": 1.939089019748081e-06, + "loss": 0.4586, + "step": 10124 + }, + { + "epoch": 0.72, + "grad_norm": 1.7596588651751137, + "learning_rate": 1.9381804350452568e-06, + "loss": 0.51, + "step": 10125 + }, + { + "epoch": 0.72, + "grad_norm": 1.7916081893083429, + "learning_rate": 1.9372720120781157e-06, + "loss": 0.5196, + "step": 10126 + }, + { + "epoch": 0.72, + "grad_norm": 1.9077171813890403, + "learning_rate": 1.9363637508946457e-06, + "loss": 0.5213, + "step": 10127 + }, + { + "epoch": 0.72, + "grad_norm": 1.7867421371152294, + "learning_rate": 1.9354556515428246e-06, + "loss": 0.5583, + "step": 10128 + }, + { + "epoch": 0.72, + "grad_norm": 1.9534052686920502, + "learning_rate": 1.934547714070617e-06, + "loss": 0.5441, + "step": 10129 + }, + { + "epoch": 0.72, + "grad_norm": 1.7497997552070477, + "learning_rate": 1.9336399385259895e-06, + "loss": 0.5401, + "step": 10130 + }, + { + "epoch": 0.72, + "grad_norm": 2.1942277021391328, + "learning_rate": 1.932732324956889e-06, + "loss": 0.4843, + "step": 10131 + }, + { + "epoch": 0.72, + "grad_norm": 1.5942516172839618, + "learning_rate": 1.931824873411261e-06, + "loss": 0.5039, + "step": 10132 + }, + { + "epoch": 0.72, + "grad_norm": 1.5365465662896785, + "learning_rate": 1.9309175839370386e-06, + "loss": 0.5523, + "step": 10133 + }, + { + "epoch": 0.72, + "grad_norm": 0.7323923824634078, + "learning_rate": 1.9300104565821496e-06, + "loss": 0.449, + "step": 10134 + }, + { + "epoch": 0.72, + "grad_norm": 2.3253882496820055, + "learning_rate": 1.9291034913945123e-06, + "loss": 0.5176, + "step": 10135 + }, + { + "epoch": 0.72, + "grad_norm": 1.9088634584458906, + "learning_rate": 1.9281966884220328e-06, + "loss": 0.5509, + "step": 10136 + }, + { + "epoch": 0.72, + "grad_norm": 1.7222852678813565, + "learning_rate": 1.9272900477126124e-06, + "loss": 0.5778, + "step": 10137 + }, + { + "epoch": 0.72, + "grad_norm": 1.7752371309653752, + "learning_rate": 1.9263835693141437e-06, + "loss": 0.4685, + "step": 10138 + }, + { + "epoch": 0.72, + "grad_norm": 1.4778273443597498, + "learning_rate": 1.9254772532745115e-06, + "loss": 0.503, + "step": 10139 + }, + { + "epoch": 0.72, + "grad_norm": 2.5236091299627534, + "learning_rate": 1.924571099641587e-06, + "loss": 0.5249, + "step": 10140 + }, + { + "epoch": 0.72, + "grad_norm": 1.708836024408149, + "learning_rate": 1.923665108463237e-06, + "loss": 0.5083, + "step": 10141 + }, + { + "epoch": 0.72, + "grad_norm": 1.9080344861443752, + "learning_rate": 1.922759279787321e-06, + "loss": 0.5881, + "step": 10142 + }, + { + "epoch": 0.72, + "grad_norm": 1.9134671306375153, + "learning_rate": 1.9218536136616873e-06, + "loss": 0.5182, + "step": 10143 + }, + { + "epoch": 0.72, + "grad_norm": 2.4752790991383797, + "learning_rate": 1.9209481101341755e-06, + "loss": 0.5606, + "step": 10144 + }, + { + "epoch": 0.72, + "grad_norm": 1.4938728223762303, + "learning_rate": 1.9200427692526173e-06, + "loss": 0.4832, + "step": 10145 + }, + { + "epoch": 0.72, + "grad_norm": 1.6072015107777136, + "learning_rate": 1.9191375910648387e-06, + "loss": 0.5295, + "step": 10146 + }, + { + "epoch": 0.72, + "grad_norm": 1.6036691883646164, + "learning_rate": 1.91823257561865e-06, + "loss": 0.5412, + "step": 10147 + }, + { + "epoch": 0.72, + "grad_norm": 1.828480673427384, + "learning_rate": 1.917327722961859e-06, + "loss": 0.4797, + "step": 10148 + }, + { + "epoch": 0.72, + "grad_norm": 0.6518286401008918, + "learning_rate": 1.9164230331422634e-06, + "loss": 0.3946, + "step": 10149 + }, + { + "epoch": 0.72, + "grad_norm": 1.7657883900451388, + "learning_rate": 1.9155185062076515e-06, + "loss": 0.4525, + "step": 10150 + }, + { + "epoch": 0.72, + "grad_norm": 1.6791301265887801, + "learning_rate": 1.914614142205806e-06, + "loss": 0.5009, + "step": 10151 + }, + { + "epoch": 0.72, + "grad_norm": 1.6524123705823408, + "learning_rate": 1.913709941184492e-06, + "loss": 0.5452, + "step": 10152 + }, + { + "epoch": 0.72, + "grad_norm": 1.7018087073829087, + "learning_rate": 1.9128059031914807e-06, + "loss": 0.5461, + "step": 10153 + }, + { + "epoch": 0.72, + "grad_norm": 1.6637716799976487, + "learning_rate": 1.9119020282745204e-06, + "loss": 0.5214, + "step": 10154 + }, + { + "epoch": 0.72, + "grad_norm": 1.6196061284180758, + "learning_rate": 1.910998316481359e-06, + "loss": 0.5278, + "step": 10155 + }, + { + "epoch": 0.72, + "grad_norm": 0.7913205202441055, + "learning_rate": 1.9100947678597337e-06, + "loss": 0.46, + "step": 10156 + }, + { + "epoch": 0.72, + "grad_norm": 1.637925521715112, + "learning_rate": 1.9091913824573725e-06, + "loss": 0.5267, + "step": 10157 + }, + { + "epoch": 0.72, + "grad_norm": 1.959301486403192, + "learning_rate": 1.9082881603219973e-06, + "loss": 0.558, + "step": 10158 + }, + { + "epoch": 0.72, + "grad_norm": 2.058616101165977, + "learning_rate": 1.9073851015013145e-06, + "loss": 0.5742, + "step": 10159 + }, + { + "epoch": 0.72, + "grad_norm": 1.7680816420441705, + "learning_rate": 1.9064822060430328e-06, + "loss": 0.4987, + "step": 10160 + }, + { + "epoch": 0.72, + "grad_norm": 1.9060046144832865, + "learning_rate": 1.9055794739948419e-06, + "loss": 0.5272, + "step": 10161 + }, + { + "epoch": 0.72, + "grad_norm": 1.4930931863712387, + "learning_rate": 1.9046769054044283e-06, + "loss": 0.4159, + "step": 10162 + }, + { + "epoch": 0.72, + "grad_norm": 0.659868167027125, + "learning_rate": 1.903774500319469e-06, + "loss": 0.437, + "step": 10163 + }, + { + "epoch": 0.72, + "grad_norm": 1.9145983095290537, + "learning_rate": 1.902872258787632e-06, + "loss": 0.5116, + "step": 10164 + }, + { + "epoch": 0.72, + "grad_norm": 1.878534051335053, + "learning_rate": 1.9019701808565788e-06, + "loss": 0.5627, + "step": 10165 + }, + { + "epoch": 0.72, + "grad_norm": 1.6790690727868454, + "learning_rate": 1.901068266573956e-06, + "loss": 0.4999, + "step": 10166 + }, + { + "epoch": 0.72, + "grad_norm": 1.4415355729968105, + "learning_rate": 1.9001665159874083e-06, + "loss": 0.4524, + "step": 10167 + }, + { + "epoch": 0.72, + "grad_norm": 1.4724404168120528, + "learning_rate": 1.8992649291445692e-06, + "loss": 0.5071, + "step": 10168 + }, + { + "epoch": 0.72, + "grad_norm": 1.745829776376896, + "learning_rate": 1.8983635060930644e-06, + "loss": 0.5316, + "step": 10169 + }, + { + "epoch": 0.72, + "grad_norm": 1.9979843200522793, + "learning_rate": 1.8974622468805076e-06, + "loss": 0.4738, + "step": 10170 + }, + { + "epoch": 0.72, + "grad_norm": 1.525709734670125, + "learning_rate": 1.8965611515545056e-06, + "loss": 0.5006, + "step": 10171 + }, + { + "epoch": 0.72, + "grad_norm": 2.179159897753943, + "learning_rate": 1.8956602201626634e-06, + "loss": 0.5604, + "step": 10172 + }, + { + "epoch": 0.72, + "grad_norm": 1.6703632166160867, + "learning_rate": 1.8947594527525654e-06, + "loss": 0.4268, + "step": 10173 + }, + { + "epoch": 0.72, + "grad_norm": 1.772341724813742, + "learning_rate": 1.8938588493717953e-06, + "loss": 0.5079, + "step": 10174 + }, + { + "epoch": 0.72, + "grad_norm": 1.7660365394608772, + "learning_rate": 1.8929584100679255e-06, + "loss": 0.5613, + "step": 10175 + }, + { + "epoch": 0.72, + "grad_norm": 1.5183539052199289, + "learning_rate": 1.8920581348885226e-06, + "loss": 0.5226, + "step": 10176 + }, + { + "epoch": 0.72, + "grad_norm": 1.6906848002335533, + "learning_rate": 1.8911580238811378e-06, + "loss": 0.4669, + "step": 10177 + }, + { + "epoch": 0.72, + "grad_norm": 2.0860454977897884, + "learning_rate": 1.890258077093321e-06, + "loss": 0.496, + "step": 10178 + }, + { + "epoch": 0.72, + "grad_norm": 3.753939221084808, + "learning_rate": 1.8893582945726097e-06, + "loss": 0.4884, + "step": 10179 + }, + { + "epoch": 0.72, + "grad_norm": 1.5966760956055974, + "learning_rate": 1.8884586763665336e-06, + "loss": 0.4948, + "step": 10180 + }, + { + "epoch": 0.72, + "grad_norm": 1.8830847208798305, + "learning_rate": 1.8875592225226153e-06, + "loss": 0.5438, + "step": 10181 + }, + { + "epoch": 0.72, + "grad_norm": 1.7636523561812776, + "learning_rate": 1.8866599330883617e-06, + "loss": 0.4776, + "step": 10182 + }, + { + "epoch": 0.72, + "grad_norm": 0.6923793345269317, + "learning_rate": 1.8857608081112833e-06, + "loss": 0.4424, + "step": 10183 + }, + { + "epoch": 0.72, + "grad_norm": 2.7571738390679097, + "learning_rate": 1.8848618476388697e-06, + "loss": 0.5278, + "step": 10184 + }, + { + "epoch": 0.72, + "grad_norm": 1.7600253174851084, + "learning_rate": 1.8839630517186086e-06, + "loss": 0.4623, + "step": 10185 + }, + { + "epoch": 0.72, + "grad_norm": 1.5173448138694987, + "learning_rate": 1.8830644203979781e-06, + "loss": 0.4749, + "step": 10186 + }, + { + "epoch": 0.72, + "grad_norm": 1.6719521399746966, + "learning_rate": 1.882165953724447e-06, + "loss": 0.4986, + "step": 10187 + }, + { + "epoch": 0.72, + "grad_norm": 1.6321891085482587, + "learning_rate": 1.8812676517454758e-06, + "loss": 0.5266, + "step": 10188 + }, + { + "epoch": 0.72, + "grad_norm": 1.5852823077815796, + "learning_rate": 1.8803695145085116e-06, + "loss": 0.5022, + "step": 10189 + }, + { + "epoch": 0.72, + "grad_norm": 0.7039969038602375, + "learning_rate": 1.8794715420610038e-06, + "loss": 0.4409, + "step": 10190 + }, + { + "epoch": 0.72, + "grad_norm": 1.8735150954911302, + "learning_rate": 1.8785737344503817e-06, + "loss": 0.5337, + "step": 10191 + }, + { + "epoch": 0.72, + "grad_norm": 2.1824007723051673, + "learning_rate": 1.8776760917240715e-06, + "loss": 0.5303, + "step": 10192 + }, + { + "epoch": 0.72, + "grad_norm": 1.7375535966139435, + "learning_rate": 1.8767786139294903e-06, + "loss": 0.5049, + "step": 10193 + }, + { + "epoch": 0.72, + "grad_norm": 1.928853201034043, + "learning_rate": 1.8758813011140447e-06, + "loss": 0.4255, + "step": 10194 + }, + { + "epoch": 0.72, + "grad_norm": 1.7028719099117662, + "learning_rate": 1.8749841533251373e-06, + "loss": 0.5261, + "step": 10195 + }, + { + "epoch": 0.72, + "grad_norm": 1.7970616310074443, + "learning_rate": 1.8740871706101543e-06, + "loss": 0.5076, + "step": 10196 + }, + { + "epoch": 0.72, + "grad_norm": 1.6478847649353774, + "learning_rate": 1.8731903530164786e-06, + "loss": 0.4643, + "step": 10197 + }, + { + "epoch": 0.72, + "grad_norm": 0.7087381307321095, + "learning_rate": 1.8722937005914838e-06, + "loss": 0.4155, + "step": 10198 + }, + { + "epoch": 0.72, + "grad_norm": 1.8160531458745552, + "learning_rate": 1.8713972133825331e-06, + "loss": 0.5576, + "step": 10199 + }, + { + "epoch": 0.72, + "grad_norm": 1.530259897680395, + "learning_rate": 1.8705008914369854e-06, + "loss": 0.5198, + "step": 10200 + }, + { + "epoch": 0.72, + "grad_norm": 0.7535356684844365, + "learning_rate": 1.8696047348021807e-06, + "loss": 0.438, + "step": 10201 + }, + { + "epoch": 0.72, + "grad_norm": 1.782971284607118, + "learning_rate": 1.8687087435254652e-06, + "loss": 0.4662, + "step": 10202 + }, + { + "epoch": 0.72, + "grad_norm": 1.918259259524247, + "learning_rate": 1.8678129176541622e-06, + "loss": 0.4645, + "step": 10203 + }, + { + "epoch": 0.72, + "grad_norm": 2.647501647075869, + "learning_rate": 1.8669172572355953e-06, + "loss": 0.4723, + "step": 10204 + }, + { + "epoch": 0.72, + "grad_norm": 1.6544055298911489, + "learning_rate": 1.8660217623170723e-06, + "loss": 0.5002, + "step": 10205 + }, + { + "epoch": 0.72, + "grad_norm": 2.5756080001696935, + "learning_rate": 1.865126432945903e-06, + "loss": 0.6528, + "step": 10206 + }, + { + "epoch": 0.72, + "grad_norm": 1.8215094474652893, + "learning_rate": 1.8642312691693754e-06, + "loss": 0.5518, + "step": 10207 + }, + { + "epoch": 0.72, + "grad_norm": 2.0092817267570338, + "learning_rate": 1.8633362710347764e-06, + "loss": 0.543, + "step": 10208 + }, + { + "epoch": 0.72, + "grad_norm": 1.6965626794015187, + "learning_rate": 1.8624414385893875e-06, + "loss": 0.4888, + "step": 10209 + }, + { + "epoch": 0.72, + "grad_norm": 1.6911713222135047, + "learning_rate": 1.8615467718804713e-06, + "loss": 0.4897, + "step": 10210 + }, + { + "epoch": 0.72, + "grad_norm": 1.7279048872893696, + "learning_rate": 1.860652270955291e-06, + "loss": 0.477, + "step": 10211 + }, + { + "epoch": 0.72, + "grad_norm": 1.6957088777858205, + "learning_rate": 1.8597579358610917e-06, + "loss": 0.4674, + "step": 10212 + }, + { + "epoch": 0.72, + "grad_norm": 2.0400924923405483, + "learning_rate": 1.8588637666451225e-06, + "loss": 0.4666, + "step": 10213 + }, + { + "epoch": 0.72, + "grad_norm": 1.6701781750279638, + "learning_rate": 1.8579697633546107e-06, + "loss": 0.5524, + "step": 10214 + }, + { + "epoch": 0.72, + "grad_norm": 1.6782171160790955, + "learning_rate": 1.8570759260367831e-06, + "loss": 0.4846, + "step": 10215 + }, + { + "epoch": 0.72, + "grad_norm": 1.641324253578939, + "learning_rate": 1.8561822547388547e-06, + "loss": 0.5355, + "step": 10216 + }, + { + "epoch": 0.73, + "grad_norm": 1.6780355917771068, + "learning_rate": 1.855288749508032e-06, + "loss": 0.4886, + "step": 10217 + }, + { + "epoch": 0.73, + "grad_norm": 1.9223914458998919, + "learning_rate": 1.8543954103915152e-06, + "loss": 0.5163, + "step": 10218 + }, + { + "epoch": 0.73, + "grad_norm": 0.7662077303941629, + "learning_rate": 1.8535022374364897e-06, + "loss": 0.4522, + "step": 10219 + }, + { + "epoch": 0.73, + "grad_norm": 1.632915480745482, + "learning_rate": 1.8526092306901384e-06, + "loss": 0.5324, + "step": 10220 + }, + { + "epoch": 0.73, + "grad_norm": 1.5904080470879762, + "learning_rate": 1.8517163901996316e-06, + "loss": 0.5291, + "step": 10221 + }, + { + "epoch": 0.73, + "grad_norm": 1.505433322125966, + "learning_rate": 1.8508237160121333e-06, + "loss": 0.5818, + "step": 10222 + }, + { + "epoch": 0.73, + "grad_norm": 1.9509554935377402, + "learning_rate": 1.8499312081747973e-06, + "loss": 0.5398, + "step": 10223 + }, + { + "epoch": 0.73, + "grad_norm": 3.1725972677284298, + "learning_rate": 1.8490388667347686e-06, + "loss": 0.5469, + "step": 10224 + }, + { + "epoch": 0.73, + "grad_norm": 2.145164727864108, + "learning_rate": 1.8481466917391855e-06, + "loss": 0.5475, + "step": 10225 + }, + { + "epoch": 0.73, + "grad_norm": 1.6688874584695186, + "learning_rate": 1.8472546832351723e-06, + "loss": 0.5413, + "step": 10226 + }, + { + "epoch": 0.73, + "grad_norm": 2.1532841920741874, + "learning_rate": 1.8463628412698497e-06, + "loss": 0.5709, + "step": 10227 + }, + { + "epoch": 0.73, + "grad_norm": 1.614321563182535, + "learning_rate": 1.8454711658903278e-06, + "loss": 0.5051, + "step": 10228 + }, + { + "epoch": 0.73, + "grad_norm": 2.8066266816649494, + "learning_rate": 1.844579657143708e-06, + "loss": 0.555, + "step": 10229 + }, + { + "epoch": 0.73, + "grad_norm": 1.711868293931153, + "learning_rate": 1.8436883150770845e-06, + "loss": 0.5426, + "step": 10230 + }, + { + "epoch": 0.73, + "grad_norm": 1.7455495395677578, + "learning_rate": 1.842797139737536e-06, + "loss": 0.5539, + "step": 10231 + }, + { + "epoch": 0.73, + "grad_norm": 1.7737050188503225, + "learning_rate": 1.8419061311721442e-06, + "loss": 0.5084, + "step": 10232 + }, + { + "epoch": 0.73, + "grad_norm": 1.4853996692594096, + "learning_rate": 1.8410152894279692e-06, + "loss": 0.4844, + "step": 10233 + }, + { + "epoch": 0.73, + "grad_norm": 1.8765078572698706, + "learning_rate": 1.8401246145520729e-06, + "loss": 0.5744, + "step": 10234 + }, + { + "epoch": 0.73, + "grad_norm": 1.673044804276567, + "learning_rate": 1.839234106591498e-06, + "loss": 0.5118, + "step": 10235 + }, + { + "epoch": 0.73, + "grad_norm": 1.636426400643627, + "learning_rate": 1.8383437655932895e-06, + "loss": 0.4893, + "step": 10236 + }, + { + "epoch": 0.73, + "grad_norm": 1.6853707709969132, + "learning_rate": 1.8374535916044784e-06, + "loss": 0.5223, + "step": 10237 + }, + { + "epoch": 0.73, + "grad_norm": 1.7179470221751045, + "learning_rate": 1.8365635846720814e-06, + "loss": 0.5523, + "step": 10238 + }, + { + "epoch": 0.73, + "grad_norm": 1.7728143434078685, + "learning_rate": 1.8356737448431179e-06, + "loss": 0.555, + "step": 10239 + }, + { + "epoch": 0.73, + "grad_norm": 1.5278036823091092, + "learning_rate": 1.8347840721645883e-06, + "loss": 0.5272, + "step": 10240 + }, + { + "epoch": 0.73, + "grad_norm": 2.5251237394598176, + "learning_rate": 1.83389456668349e-06, + "loss": 0.4629, + "step": 10241 + }, + { + "epoch": 0.73, + "grad_norm": 1.5318292832259057, + "learning_rate": 1.8330052284468065e-06, + "loss": 0.5225, + "step": 10242 + }, + { + "epoch": 0.73, + "grad_norm": 0.7146175948064835, + "learning_rate": 1.8321160575015211e-06, + "loss": 0.4322, + "step": 10243 + }, + { + "epoch": 0.73, + "grad_norm": 1.7050205935910363, + "learning_rate": 1.831227053894598e-06, + "loss": 0.5365, + "step": 10244 + }, + { + "epoch": 0.73, + "grad_norm": 2.225599415910144, + "learning_rate": 1.8303382176729996e-06, + "loss": 0.5255, + "step": 10245 + }, + { + "epoch": 0.73, + "grad_norm": 1.54064879292326, + "learning_rate": 1.8294495488836762e-06, + "loss": 0.4579, + "step": 10246 + }, + { + "epoch": 0.73, + "grad_norm": 2.187580777844201, + "learning_rate": 1.828561047573571e-06, + "loss": 0.4447, + "step": 10247 + }, + { + "epoch": 0.73, + "grad_norm": 2.0429011825702035, + "learning_rate": 1.8276727137896199e-06, + "loss": 0.575, + "step": 10248 + }, + { + "epoch": 0.73, + "grad_norm": 1.6149149611328548, + "learning_rate": 1.8267845475787433e-06, + "loss": 0.4826, + "step": 10249 + }, + { + "epoch": 0.73, + "grad_norm": 1.6001103620602708, + "learning_rate": 1.8258965489878589e-06, + "loss": 0.5286, + "step": 10250 + }, + { + "epoch": 0.73, + "grad_norm": 1.8266797780773252, + "learning_rate": 1.8250087180638743e-06, + "loss": 0.4873, + "step": 10251 + }, + { + "epoch": 0.73, + "grad_norm": 1.8035756083423333, + "learning_rate": 1.824121054853688e-06, + "loss": 0.5375, + "step": 10252 + }, + { + "epoch": 0.73, + "grad_norm": 3.6608137565991514, + "learning_rate": 1.82323355940419e-06, + "loss": 0.5946, + "step": 10253 + }, + { + "epoch": 0.73, + "grad_norm": 1.6381398334487136, + "learning_rate": 1.822346231762257e-06, + "loss": 0.5343, + "step": 10254 + }, + { + "epoch": 0.73, + "grad_norm": 1.6484521308403561, + "learning_rate": 1.821459071974766e-06, + "loss": 0.4966, + "step": 10255 + }, + { + "epoch": 0.73, + "grad_norm": 2.0326007990692947, + "learning_rate": 1.8205720800885763e-06, + "loss": 0.5029, + "step": 10256 + }, + { + "epoch": 0.73, + "grad_norm": 1.4437674345309566, + "learning_rate": 1.819685256150542e-06, + "loss": 0.5124, + "step": 10257 + }, + { + "epoch": 0.73, + "grad_norm": 1.6517247547535743, + "learning_rate": 1.8187986002075092e-06, + "loss": 0.4898, + "step": 10258 + }, + { + "epoch": 0.73, + "grad_norm": 0.7204775791678775, + "learning_rate": 1.8179121123063137e-06, + "loss": 0.4524, + "step": 10259 + }, + { + "epoch": 0.73, + "grad_norm": 1.8476266816181046, + "learning_rate": 1.8170257924937845e-06, + "loss": 0.4862, + "step": 10260 + }, + { + "epoch": 0.73, + "grad_norm": 2.001977459882083, + "learning_rate": 1.8161396408167349e-06, + "loss": 0.5897, + "step": 10261 + }, + { + "epoch": 0.73, + "grad_norm": 1.8715315799430345, + "learning_rate": 1.8152536573219815e-06, + "loss": 0.6164, + "step": 10262 + }, + { + "epoch": 0.73, + "grad_norm": 2.8713777847710897, + "learning_rate": 1.8143678420563194e-06, + "loss": 0.5278, + "step": 10263 + }, + { + "epoch": 0.73, + "grad_norm": 1.5791041144065054, + "learning_rate": 1.813482195066542e-06, + "loss": 0.5177, + "step": 10264 + }, + { + "epoch": 0.73, + "grad_norm": 2.4436716648595866, + "learning_rate": 1.812596716399433e-06, + "loss": 0.5287, + "step": 10265 + }, + { + "epoch": 0.73, + "grad_norm": 2.1032978296268943, + "learning_rate": 1.8117114061017655e-06, + "loss": 0.5081, + "step": 10266 + }, + { + "epoch": 0.73, + "grad_norm": 2.331970753087138, + "learning_rate": 1.8108262642203068e-06, + "loss": 0.5158, + "step": 10267 + }, + { + "epoch": 0.73, + "grad_norm": 2.4750113229756296, + "learning_rate": 1.8099412908018093e-06, + "loss": 0.5869, + "step": 10268 + }, + { + "epoch": 0.73, + "grad_norm": 4.53333963819753, + "learning_rate": 1.8090564858930222e-06, + "loss": 0.5431, + "step": 10269 + }, + { + "epoch": 0.73, + "grad_norm": 1.7181905402845163, + "learning_rate": 1.8081718495406842e-06, + "loss": 0.5055, + "step": 10270 + }, + { + "epoch": 0.73, + "grad_norm": 1.5873132535531636, + "learning_rate": 1.807287381791526e-06, + "loss": 0.4958, + "step": 10271 + }, + { + "epoch": 0.73, + "grad_norm": 1.514184129184166, + "learning_rate": 1.806403082692263e-06, + "loss": 0.427, + "step": 10272 + }, + { + "epoch": 0.73, + "grad_norm": 1.7632200982812913, + "learning_rate": 1.8055189522896144e-06, + "loss": 0.5692, + "step": 10273 + }, + { + "epoch": 0.73, + "grad_norm": 0.6859453577184376, + "learning_rate": 1.8046349906302774e-06, + "loss": 0.4266, + "step": 10274 + }, + { + "epoch": 0.73, + "grad_norm": 2.5753104256284596, + "learning_rate": 1.8037511977609474e-06, + "loss": 0.5675, + "step": 10275 + }, + { + "epoch": 0.73, + "grad_norm": 2.0032313867336136, + "learning_rate": 1.8028675737283098e-06, + "loss": 0.5419, + "step": 10276 + }, + { + "epoch": 0.73, + "grad_norm": 0.6744645711328259, + "learning_rate": 1.8019841185790398e-06, + "loss": 0.4324, + "step": 10277 + }, + { + "epoch": 0.73, + "grad_norm": 0.6676238218487873, + "learning_rate": 1.8011008323598067e-06, + "loss": 0.4053, + "step": 10278 + }, + { + "epoch": 0.73, + "grad_norm": 2.423500139257181, + "learning_rate": 1.8002177151172657e-06, + "loss": 0.4651, + "step": 10279 + }, + { + "epoch": 0.73, + "grad_norm": 0.6858536381620776, + "learning_rate": 1.799334766898067e-06, + "loss": 0.3959, + "step": 10280 + }, + { + "epoch": 0.73, + "grad_norm": 5.038373956546306, + "learning_rate": 1.7984519877488515e-06, + "loss": 0.473, + "step": 10281 + }, + { + "epoch": 0.73, + "grad_norm": 1.687170789860624, + "learning_rate": 1.7975693777162506e-06, + "loss": 0.4883, + "step": 10282 + }, + { + "epoch": 0.73, + "grad_norm": 1.5408224808190714, + "learning_rate": 1.7966869368468876e-06, + "loss": 0.5773, + "step": 10283 + }, + { + "epoch": 0.73, + "grad_norm": 1.5943652518338445, + "learning_rate": 1.7958046651873716e-06, + "loss": 0.54, + "step": 10284 + }, + { + "epoch": 0.73, + "grad_norm": 1.5365052918813296, + "learning_rate": 1.7949225627843142e-06, + "loss": 0.406, + "step": 10285 + }, + { + "epoch": 0.73, + "grad_norm": 2.2252852534713883, + "learning_rate": 1.7940406296843054e-06, + "loss": 0.6191, + "step": 10286 + }, + { + "epoch": 0.73, + "grad_norm": 2.10556325734974, + "learning_rate": 1.7931588659339339e-06, + "loss": 0.5534, + "step": 10287 + }, + { + "epoch": 0.73, + "grad_norm": 1.5842475643812837, + "learning_rate": 1.7922772715797775e-06, + "loss": 0.4965, + "step": 10288 + }, + { + "epoch": 0.73, + "grad_norm": 2.3075002023145132, + "learning_rate": 1.7913958466684046e-06, + "loss": 0.4635, + "step": 10289 + }, + { + "epoch": 0.73, + "grad_norm": 1.5653405101513966, + "learning_rate": 1.7905145912463773e-06, + "loss": 0.5327, + "step": 10290 + }, + { + "epoch": 0.73, + "grad_norm": 1.5237925113232005, + "learning_rate": 1.7896335053602409e-06, + "loss": 0.4682, + "step": 10291 + }, + { + "epoch": 0.73, + "grad_norm": 1.8318671220515856, + "learning_rate": 1.7887525890565443e-06, + "loss": 0.543, + "step": 10292 + }, + { + "epoch": 0.73, + "grad_norm": 1.9930256077007082, + "learning_rate": 1.787871842381816e-06, + "loss": 0.5533, + "step": 10293 + }, + { + "epoch": 0.73, + "grad_norm": 1.7596420698745947, + "learning_rate": 1.7869912653825804e-06, + "loss": 0.5685, + "step": 10294 + }, + { + "epoch": 0.73, + "grad_norm": 2.11990274752441, + "learning_rate": 1.7861108581053533e-06, + "loss": 0.5275, + "step": 10295 + }, + { + "epoch": 0.73, + "grad_norm": 1.421362756674344, + "learning_rate": 1.7852306205966413e-06, + "loss": 0.4029, + "step": 10296 + }, + { + "epoch": 0.73, + "grad_norm": 3.4398351761731654, + "learning_rate": 1.7843505529029426e-06, + "loss": 0.5464, + "step": 10297 + }, + { + "epoch": 0.73, + "grad_norm": 1.5991478915852824, + "learning_rate": 1.783470655070742e-06, + "loss": 0.5091, + "step": 10298 + }, + { + "epoch": 0.73, + "grad_norm": 1.6640874573350437, + "learning_rate": 1.782590927146521e-06, + "loss": 0.5076, + "step": 10299 + }, + { + "epoch": 0.73, + "grad_norm": 0.6896046778372105, + "learning_rate": 1.7817113691767485e-06, + "loss": 0.4404, + "step": 10300 + }, + { + "epoch": 0.73, + "grad_norm": 1.9633955958098148, + "learning_rate": 1.7808319812078884e-06, + "loss": 0.524, + "step": 10301 + }, + { + "epoch": 0.73, + "grad_norm": 2.1184092690481817, + "learning_rate": 1.7799527632863877e-06, + "loss": 0.5026, + "step": 10302 + }, + { + "epoch": 0.73, + "grad_norm": 1.6350990899119886, + "learning_rate": 1.7790737154586951e-06, + "loss": 0.4898, + "step": 10303 + }, + { + "epoch": 0.73, + "grad_norm": 1.471295395073119, + "learning_rate": 1.7781948377712444e-06, + "loss": 0.5702, + "step": 10304 + }, + { + "epoch": 0.73, + "grad_norm": 1.6354896351665964, + "learning_rate": 1.7773161302704578e-06, + "loss": 0.5923, + "step": 10305 + }, + { + "epoch": 0.73, + "grad_norm": 2.0062452698505227, + "learning_rate": 1.7764375930027528e-06, + "loss": 0.5645, + "step": 10306 + }, + { + "epoch": 0.73, + "grad_norm": 1.5824567447183244, + "learning_rate": 1.775559226014537e-06, + "loss": 0.5498, + "step": 10307 + }, + { + "epoch": 0.73, + "grad_norm": 1.8110817153523036, + "learning_rate": 1.774681029352211e-06, + "loss": 0.4755, + "step": 10308 + }, + { + "epoch": 0.73, + "grad_norm": 0.6593025218220048, + "learning_rate": 1.77380300306216e-06, + "loss": 0.4126, + "step": 10309 + }, + { + "epoch": 0.73, + "grad_norm": 2.0152794191454837, + "learning_rate": 1.772925147190766e-06, + "loss": 0.5973, + "step": 10310 + }, + { + "epoch": 0.73, + "grad_norm": 2.597243670649812, + "learning_rate": 1.7720474617844008e-06, + "loss": 0.4862, + "step": 10311 + }, + { + "epoch": 0.73, + "grad_norm": 2.5748548100654904, + "learning_rate": 1.7711699468894262e-06, + "loss": 0.5564, + "step": 10312 + }, + { + "epoch": 0.73, + "grad_norm": 1.8729983966265478, + "learning_rate": 1.7702926025521972e-06, + "loss": 0.5151, + "step": 10313 + }, + { + "epoch": 0.73, + "grad_norm": 1.6947000122007196, + "learning_rate": 1.769415428819054e-06, + "loss": 0.4847, + "step": 10314 + }, + { + "epoch": 0.73, + "grad_norm": 1.6161940049854904, + "learning_rate": 1.7685384257363374e-06, + "loss": 0.4457, + "step": 10315 + }, + { + "epoch": 0.73, + "grad_norm": 1.5841886546252433, + "learning_rate": 1.7676615933503694e-06, + "loss": 0.5959, + "step": 10316 + }, + { + "epoch": 0.73, + "grad_norm": 1.6243431532036179, + "learning_rate": 1.7667849317074686e-06, + "loss": 0.488, + "step": 10317 + }, + { + "epoch": 0.73, + "grad_norm": 1.7400271755768821, + "learning_rate": 1.7659084408539435e-06, + "loss": 0.4951, + "step": 10318 + }, + { + "epoch": 0.73, + "grad_norm": 1.6498859482199957, + "learning_rate": 1.7650321208360932e-06, + "loss": 0.5478, + "step": 10319 + }, + { + "epoch": 0.73, + "grad_norm": 1.5066752768022127, + "learning_rate": 1.7641559717002094e-06, + "loss": 0.5101, + "step": 10320 + }, + { + "epoch": 0.73, + "grad_norm": 2.397551992860907, + "learning_rate": 1.7632799934925682e-06, + "loss": 0.498, + "step": 10321 + }, + { + "epoch": 0.73, + "grad_norm": 2.065950663636038, + "learning_rate": 1.7624041862594487e-06, + "loss": 0.5418, + "step": 10322 + }, + { + "epoch": 0.73, + "grad_norm": 1.5808678758814272, + "learning_rate": 1.761528550047109e-06, + "loss": 0.5192, + "step": 10323 + }, + { + "epoch": 0.73, + "grad_norm": 1.641899930610851, + "learning_rate": 1.7606530849018044e-06, + "loss": 0.5385, + "step": 10324 + }, + { + "epoch": 0.73, + "grad_norm": 1.821812471995573, + "learning_rate": 1.7597777908697804e-06, + "loss": 0.5493, + "step": 10325 + }, + { + "epoch": 0.73, + "grad_norm": 1.7093218858287975, + "learning_rate": 1.7589026679972731e-06, + "loss": 0.4929, + "step": 10326 + }, + { + "epoch": 0.73, + "grad_norm": 1.476475243649168, + "learning_rate": 1.7580277163305109e-06, + "loss": 0.4392, + "step": 10327 + }, + { + "epoch": 0.73, + "grad_norm": 1.5308765696377187, + "learning_rate": 1.757152935915708e-06, + "loss": 0.4851, + "step": 10328 + }, + { + "epoch": 0.73, + "grad_norm": 1.5575976191337766, + "learning_rate": 1.7562783267990758e-06, + "loss": 0.5679, + "step": 10329 + }, + { + "epoch": 0.73, + "grad_norm": 1.7047632152777175, + "learning_rate": 1.7554038890268132e-06, + "loss": 0.5259, + "step": 10330 + }, + { + "epoch": 0.73, + "grad_norm": 1.6062119872948746, + "learning_rate": 1.7545296226451115e-06, + "loss": 0.5229, + "step": 10331 + }, + { + "epoch": 0.73, + "grad_norm": 0.687552023179318, + "learning_rate": 1.7536555277001538e-06, + "loss": 0.4, + "step": 10332 + }, + { + "epoch": 0.73, + "grad_norm": 1.5527546140474207, + "learning_rate": 1.752781604238108e-06, + "loss": 0.4962, + "step": 10333 + }, + { + "epoch": 0.73, + "grad_norm": 1.878967841047379, + "learning_rate": 1.751907852305144e-06, + "loss": 0.5168, + "step": 10334 + }, + { + "epoch": 0.73, + "grad_norm": 1.6729464416993773, + "learning_rate": 1.7510342719474122e-06, + "loss": 0.5289, + "step": 10335 + }, + { + "epoch": 0.73, + "grad_norm": 1.6134296145143128, + "learning_rate": 1.750160863211059e-06, + "loss": 0.4738, + "step": 10336 + }, + { + "epoch": 0.73, + "grad_norm": 1.533127672403869, + "learning_rate": 1.749287626142221e-06, + "loss": 0.5538, + "step": 10337 + }, + { + "epoch": 0.73, + "grad_norm": 1.6399703884410466, + "learning_rate": 1.7484145607870267e-06, + "loss": 0.4824, + "step": 10338 + }, + { + "epoch": 0.73, + "grad_norm": 1.5784383598902836, + "learning_rate": 1.7475416671915917e-06, + "loss": 0.4932, + "step": 10339 + }, + { + "epoch": 0.73, + "grad_norm": 1.4921738258238617, + "learning_rate": 1.7466689454020252e-06, + "loss": 0.5707, + "step": 10340 + }, + { + "epoch": 0.73, + "grad_norm": 1.7844735175746989, + "learning_rate": 1.7457963954644324e-06, + "loss": 0.5097, + "step": 10341 + }, + { + "epoch": 0.73, + "grad_norm": 1.9697191460906485, + "learning_rate": 1.7449240174248988e-06, + "loss": 0.4883, + "step": 10342 + }, + { + "epoch": 0.73, + "grad_norm": 1.9585912796546394, + "learning_rate": 1.7440518113295095e-06, + "loss": 0.518, + "step": 10343 + }, + { + "epoch": 0.73, + "grad_norm": 2.1492245220407944, + "learning_rate": 1.7431797772243336e-06, + "loss": 0.5547, + "step": 10344 + }, + { + "epoch": 0.73, + "grad_norm": 1.8535980764213418, + "learning_rate": 1.7423079151554401e-06, + "loss": 0.5817, + "step": 10345 + }, + { + "epoch": 0.73, + "grad_norm": 1.5944258898918746, + "learning_rate": 1.74143622516888e-06, + "loss": 0.5733, + "step": 10346 + }, + { + "epoch": 0.73, + "grad_norm": 1.6640121644785961, + "learning_rate": 1.7405647073106996e-06, + "loss": 0.5212, + "step": 10347 + }, + { + "epoch": 0.73, + "grad_norm": 1.8652546485379602, + "learning_rate": 1.7396933616269363e-06, + "loss": 0.4983, + "step": 10348 + }, + { + "epoch": 0.73, + "grad_norm": 1.6540322731858663, + "learning_rate": 1.7388221881636163e-06, + "loss": 0.5958, + "step": 10349 + }, + { + "epoch": 0.73, + "grad_norm": 1.6088697712529059, + "learning_rate": 1.7379511869667616e-06, + "loss": 0.5079, + "step": 10350 + }, + { + "epoch": 0.73, + "grad_norm": 0.6548238733295465, + "learning_rate": 1.7370803580823741e-06, + "loss": 0.4236, + "step": 10351 + }, + { + "epoch": 0.73, + "grad_norm": 1.9759411167808332, + "learning_rate": 1.7362097015564622e-06, + "loss": 0.4925, + "step": 10352 + }, + { + "epoch": 0.73, + "grad_norm": 3.3134523705439682, + "learning_rate": 1.735339217435011e-06, + "loss": 0.5737, + "step": 10353 + }, + { + "epoch": 0.73, + "grad_norm": 0.7263337916184869, + "learning_rate": 1.7344689057640047e-06, + "loss": 0.4055, + "step": 10354 + }, + { + "epoch": 0.73, + "grad_norm": 1.6632690589477614, + "learning_rate": 1.7335987665894161e-06, + "loss": 0.4731, + "step": 10355 + }, + { + "epoch": 0.73, + "grad_norm": 1.8483688818726582, + "learning_rate": 1.732728799957209e-06, + "loss": 0.5321, + "step": 10356 + }, + { + "epoch": 0.73, + "grad_norm": 1.873937351044728, + "learning_rate": 1.7318590059133388e-06, + "loss": 0.4842, + "step": 10357 + }, + { + "epoch": 0.74, + "grad_norm": 1.7600011802961772, + "learning_rate": 1.7309893845037483e-06, + "loss": 0.5457, + "step": 10358 + }, + { + "epoch": 0.74, + "grad_norm": 2.173819979530444, + "learning_rate": 1.730119935774376e-06, + "loss": 0.6189, + "step": 10359 + }, + { + "epoch": 0.74, + "grad_norm": 1.6560097824555855, + "learning_rate": 1.7292506597711479e-06, + "loss": 0.4676, + "step": 10360 + }, + { + "epoch": 0.74, + "grad_norm": 1.4438926081279408, + "learning_rate": 1.7283815565399831e-06, + "loss": 0.4985, + "step": 10361 + }, + { + "epoch": 0.74, + "grad_norm": 1.634944471107545, + "learning_rate": 1.7275126261267916e-06, + "loss": 0.4585, + "step": 10362 + }, + { + "epoch": 0.74, + "grad_norm": 1.672401088751102, + "learning_rate": 1.726643868577469e-06, + "loss": 0.4938, + "step": 10363 + }, + { + "epoch": 0.74, + "grad_norm": 1.7312622590562106, + "learning_rate": 1.7257752839379116e-06, + "loss": 0.5491, + "step": 10364 + }, + { + "epoch": 0.74, + "grad_norm": 3.573520631994281, + "learning_rate": 1.7249068722539974e-06, + "loss": 0.5296, + "step": 10365 + }, + { + "epoch": 0.74, + "grad_norm": 1.6206713183616401, + "learning_rate": 1.7240386335715992e-06, + "loss": 0.5113, + "step": 10366 + }, + { + "epoch": 0.74, + "grad_norm": 1.6744626421913837, + "learning_rate": 1.7231705679365812e-06, + "loss": 0.5189, + "step": 10367 + }, + { + "epoch": 0.74, + "grad_norm": 1.6804936812649143, + "learning_rate": 1.7223026753947968e-06, + "loss": 0.5277, + "step": 10368 + }, + { + "epoch": 0.74, + "grad_norm": 1.6682704988388106, + "learning_rate": 1.7214349559920935e-06, + "loss": 0.5739, + "step": 10369 + }, + { + "epoch": 0.74, + "grad_norm": 1.8590285278297647, + "learning_rate": 1.7205674097743013e-06, + "loss": 0.4646, + "step": 10370 + }, + { + "epoch": 0.74, + "grad_norm": 1.7654059720660669, + "learning_rate": 1.7197000367872546e-06, + "loss": 0.4876, + "step": 10371 + }, + { + "epoch": 0.74, + "grad_norm": 1.803284097360823, + "learning_rate": 1.7188328370767655e-06, + "loss": 0.57, + "step": 10372 + }, + { + "epoch": 0.74, + "grad_norm": 0.8036707048224655, + "learning_rate": 1.7179658106886454e-06, + "loss": 0.4269, + "step": 10373 + }, + { + "epoch": 0.74, + "grad_norm": 1.8502739283553713, + "learning_rate": 1.7170989576686886e-06, + "loss": 0.475, + "step": 10374 + }, + { + "epoch": 0.74, + "grad_norm": 1.7159013696702918, + "learning_rate": 1.7162322780626927e-06, + "loss": 0.4873, + "step": 10375 + }, + { + "epoch": 0.74, + "grad_norm": 1.6739567271397748, + "learning_rate": 1.7153657719164328e-06, + "loss": 0.5367, + "step": 10376 + }, + { + "epoch": 0.74, + "grad_norm": 1.864675244696054, + "learning_rate": 1.714499439275683e-06, + "loss": 0.472, + "step": 10377 + }, + { + "epoch": 0.74, + "grad_norm": 2.6730129367537763, + "learning_rate": 1.7136332801862054e-06, + "loss": 0.5418, + "step": 10378 + }, + { + "epoch": 0.74, + "grad_norm": 1.985802702866608, + "learning_rate": 1.712767294693754e-06, + "loss": 0.574, + "step": 10379 + }, + { + "epoch": 0.74, + "grad_norm": 1.6182077241835342, + "learning_rate": 1.7119014828440738e-06, + "loss": 0.4954, + "step": 10380 + }, + { + "epoch": 0.74, + "grad_norm": 1.742991375490611, + "learning_rate": 1.7110358446828979e-06, + "loss": 0.5448, + "step": 10381 + }, + { + "epoch": 0.74, + "grad_norm": 1.7058014552596428, + "learning_rate": 1.7101703802559527e-06, + "loss": 0.4955, + "step": 10382 + }, + { + "epoch": 0.74, + "grad_norm": 0.658482061262983, + "learning_rate": 1.7093050896089557e-06, + "loss": 0.43, + "step": 10383 + }, + { + "epoch": 0.74, + "grad_norm": 1.6178108292214874, + "learning_rate": 1.7084399727876144e-06, + "loss": 0.5457, + "step": 10384 + }, + { + "epoch": 0.74, + "grad_norm": 2.1970929159463317, + "learning_rate": 1.707575029837627e-06, + "loss": 0.5455, + "step": 10385 + }, + { + "epoch": 0.74, + "grad_norm": 1.809179981089818, + "learning_rate": 1.7067102608046826e-06, + "loss": 0.572, + "step": 10386 + }, + { + "epoch": 0.74, + "grad_norm": 1.7271939999098316, + "learning_rate": 1.7058456657344629e-06, + "loss": 0.4448, + "step": 10387 + }, + { + "epoch": 0.74, + "grad_norm": 2.1632508641204566, + "learning_rate": 1.704981244672636e-06, + "loss": 0.4833, + "step": 10388 + }, + { + "epoch": 0.74, + "grad_norm": 1.8450837209214086, + "learning_rate": 1.7041169976648648e-06, + "loss": 0.598, + "step": 10389 + }, + { + "epoch": 0.74, + "grad_norm": 1.8983380378202808, + "learning_rate": 1.7032529247568019e-06, + "loss": 0.48, + "step": 10390 + }, + { + "epoch": 0.74, + "grad_norm": 1.8499653379713006, + "learning_rate": 1.70238902599409e-06, + "loss": 0.5128, + "step": 10391 + }, + { + "epoch": 0.74, + "grad_norm": 1.670900701875766, + "learning_rate": 1.7015253014223655e-06, + "loss": 0.5478, + "step": 10392 + }, + { + "epoch": 0.74, + "grad_norm": 1.8133398193051051, + "learning_rate": 1.7006617510872475e-06, + "loss": 0.4835, + "step": 10393 + }, + { + "epoch": 0.74, + "grad_norm": 1.6032307954882428, + "learning_rate": 1.6997983750343594e-06, + "loss": 0.5424, + "step": 10394 + }, + { + "epoch": 0.74, + "grad_norm": 1.6120602642781467, + "learning_rate": 1.6989351733093019e-06, + "loss": 0.5196, + "step": 10395 + }, + { + "epoch": 0.74, + "grad_norm": 1.932607485570893, + "learning_rate": 1.6980721459576743e-06, + "loss": 0.6054, + "step": 10396 + }, + { + "epoch": 0.74, + "grad_norm": 1.593932553661682, + "learning_rate": 1.6972092930250639e-06, + "loss": 0.543, + "step": 10397 + }, + { + "epoch": 0.74, + "grad_norm": 0.8039147055590061, + "learning_rate": 1.6963466145570507e-06, + "loss": 0.4438, + "step": 10398 + }, + { + "epoch": 0.74, + "grad_norm": 0.6793703957727107, + "learning_rate": 1.6954841105992047e-06, + "loss": 0.4191, + "step": 10399 + }, + { + "epoch": 0.74, + "grad_norm": 2.081196371902629, + "learning_rate": 1.6946217811970823e-06, + "loss": 0.5318, + "step": 10400 + }, + { + "epoch": 0.74, + "grad_norm": 2.2658102642237985, + "learning_rate": 1.6937596263962402e-06, + "loss": 0.4996, + "step": 10401 + }, + { + "epoch": 0.74, + "grad_norm": 1.4400183125041217, + "learning_rate": 1.6928976462422163e-06, + "loss": 0.4304, + "step": 10402 + }, + { + "epoch": 0.74, + "grad_norm": 2.1113126024773203, + "learning_rate": 1.692035840780547e-06, + "loss": 0.5005, + "step": 10403 + }, + { + "epoch": 0.74, + "grad_norm": 1.5698146616707072, + "learning_rate": 1.6911742100567496e-06, + "loss": 0.5054, + "step": 10404 + }, + { + "epoch": 0.74, + "grad_norm": 1.770830893977512, + "learning_rate": 1.6903127541163462e-06, + "loss": 0.5966, + "step": 10405 + }, + { + "epoch": 0.74, + "grad_norm": 1.5257558337666013, + "learning_rate": 1.6894514730048356e-06, + "loss": 0.5549, + "step": 10406 + }, + { + "epoch": 0.74, + "grad_norm": 0.7212878409751853, + "learning_rate": 1.6885903667677155e-06, + "loss": 0.4301, + "step": 10407 + }, + { + "epoch": 0.74, + "grad_norm": 2.2237780482882656, + "learning_rate": 1.6877294354504736e-06, + "loss": 0.5865, + "step": 10408 + }, + { + "epoch": 0.74, + "grad_norm": 2.671086678063606, + "learning_rate": 1.686868679098586e-06, + "loss": 0.5875, + "step": 10409 + }, + { + "epoch": 0.74, + "grad_norm": 1.5840092294401948, + "learning_rate": 1.6860080977575232e-06, + "loss": 0.4682, + "step": 10410 + }, + { + "epoch": 0.74, + "grad_norm": 1.4905424363420188, + "learning_rate": 1.6851476914727406e-06, + "loss": 0.4941, + "step": 10411 + }, + { + "epoch": 0.74, + "grad_norm": 1.584074625439617, + "learning_rate": 1.6842874602896886e-06, + "loss": 0.5169, + "step": 10412 + }, + { + "epoch": 0.74, + "grad_norm": 1.7611079199144852, + "learning_rate": 1.6834274042538079e-06, + "loss": 0.5325, + "step": 10413 + }, + { + "epoch": 0.74, + "grad_norm": 1.6668419782344575, + "learning_rate": 1.6825675234105304e-06, + "loss": 0.5171, + "step": 10414 + }, + { + "epoch": 0.74, + "grad_norm": 1.8164629091306044, + "learning_rate": 1.681707817805277e-06, + "loss": 0.4815, + "step": 10415 + }, + { + "epoch": 0.74, + "grad_norm": 1.6564290836698472, + "learning_rate": 1.6808482874834608e-06, + "loss": 0.5237, + "step": 10416 + }, + { + "epoch": 0.74, + "grad_norm": 1.6441686709304315, + "learning_rate": 1.6799889324904862e-06, + "loss": 0.566, + "step": 10417 + }, + { + "epoch": 0.74, + "grad_norm": 5.024789097625792, + "learning_rate": 1.6791297528717444e-06, + "loss": 0.5072, + "step": 10418 + }, + { + "epoch": 0.74, + "grad_norm": 1.6239723318605777, + "learning_rate": 1.678270748672622e-06, + "loss": 0.552, + "step": 10419 + }, + { + "epoch": 0.74, + "grad_norm": 1.528074809745828, + "learning_rate": 1.6774119199384936e-06, + "loss": 0.4728, + "step": 10420 + }, + { + "epoch": 0.74, + "grad_norm": 2.113207746251324, + "learning_rate": 1.6765532667147267e-06, + "loss": 0.5054, + "step": 10421 + }, + { + "epoch": 0.74, + "grad_norm": 2.0096637856811537, + "learning_rate": 1.675694789046679e-06, + "loss": 0.5712, + "step": 10422 + }, + { + "epoch": 0.74, + "grad_norm": 1.7768621547958665, + "learning_rate": 1.6748364869796934e-06, + "loss": 0.5728, + "step": 10423 + }, + { + "epoch": 0.74, + "grad_norm": 1.778457500294935, + "learning_rate": 1.6739783605591153e-06, + "loss": 0.5682, + "step": 10424 + }, + { + "epoch": 0.74, + "grad_norm": 1.6821392099697976, + "learning_rate": 1.6731204098302684e-06, + "loss": 0.4984, + "step": 10425 + }, + { + "epoch": 0.74, + "grad_norm": 1.673424745104213, + "learning_rate": 1.6722626348384746e-06, + "loss": 0.4734, + "step": 10426 + }, + { + "epoch": 0.74, + "grad_norm": 1.6486004789863549, + "learning_rate": 1.6714050356290445e-06, + "loss": 0.4768, + "step": 10427 + }, + { + "epoch": 0.74, + "grad_norm": 2.0346234412877657, + "learning_rate": 1.6705476122472786e-06, + "loss": 0.5796, + "step": 10428 + }, + { + "epoch": 0.74, + "grad_norm": 2.034375902730215, + "learning_rate": 1.6696903647384722e-06, + "loss": 0.4636, + "step": 10429 + }, + { + "epoch": 0.74, + "grad_norm": 2.077013018006939, + "learning_rate": 1.6688332931479012e-06, + "loss": 0.4784, + "step": 10430 + }, + { + "epoch": 0.74, + "grad_norm": 1.9325640624759457, + "learning_rate": 1.667976397520847e-06, + "loss": 0.4728, + "step": 10431 + }, + { + "epoch": 0.74, + "grad_norm": 1.6127831067676852, + "learning_rate": 1.6671196779025678e-06, + "loss": 0.4802, + "step": 10432 + }, + { + "epoch": 0.74, + "grad_norm": 1.538673351490178, + "learning_rate": 1.6662631343383222e-06, + "loss": 0.4803, + "step": 10433 + }, + { + "epoch": 0.74, + "grad_norm": 3.1941630998708828, + "learning_rate": 1.6654067668733515e-06, + "loss": 0.5381, + "step": 10434 + }, + { + "epoch": 0.74, + "grad_norm": 4.764459039099109, + "learning_rate": 1.6645505755528958e-06, + "loss": 0.5677, + "step": 10435 + }, + { + "epoch": 0.74, + "grad_norm": 1.644119138447471, + "learning_rate": 1.6636945604221822e-06, + "loss": 0.4869, + "step": 10436 + }, + { + "epoch": 0.74, + "grad_norm": 1.8814813450933507, + "learning_rate": 1.6628387215264252e-06, + "loss": 0.5823, + "step": 10437 + }, + { + "epoch": 0.74, + "grad_norm": 1.9149876593201713, + "learning_rate": 1.661983058910835e-06, + "loss": 0.5007, + "step": 10438 + }, + { + "epoch": 0.74, + "grad_norm": 2.4141258959374676, + "learning_rate": 1.66112757262061e-06, + "loss": 0.4768, + "step": 10439 + }, + { + "epoch": 0.74, + "grad_norm": 1.7520994297315047, + "learning_rate": 1.6602722627009426e-06, + "loss": 0.5605, + "step": 10440 + }, + { + "epoch": 0.74, + "grad_norm": 1.614663997261603, + "learning_rate": 1.6594171291970086e-06, + "loss": 0.5073, + "step": 10441 + }, + { + "epoch": 0.74, + "grad_norm": 1.845411779764572, + "learning_rate": 1.6585621721539814e-06, + "loss": 0.546, + "step": 10442 + }, + { + "epoch": 0.74, + "grad_norm": 0.8303856505958676, + "learning_rate": 1.6577073916170227e-06, + "loss": 0.4277, + "step": 10443 + }, + { + "epoch": 0.74, + "grad_norm": 1.6999103029327187, + "learning_rate": 1.6568527876312845e-06, + "loss": 0.5893, + "step": 10444 + }, + { + "epoch": 0.74, + "grad_norm": 2.089428611848198, + "learning_rate": 1.6559983602419123e-06, + "loss": 0.5572, + "step": 10445 + }, + { + "epoch": 0.74, + "grad_norm": 1.796832616236273, + "learning_rate": 1.655144109494034e-06, + "loss": 0.5026, + "step": 10446 + }, + { + "epoch": 0.74, + "grad_norm": 2.0366481534010057, + "learning_rate": 1.6542900354327813e-06, + "loss": 0.5486, + "step": 10447 + }, + { + "epoch": 0.74, + "grad_norm": 0.7069394734044736, + "learning_rate": 1.6534361381032643e-06, + "loss": 0.428, + "step": 10448 + }, + { + "epoch": 0.74, + "grad_norm": 1.9225174078373273, + "learning_rate": 1.65258241755059e-06, + "loss": 0.4959, + "step": 10449 + }, + { + "epoch": 0.74, + "grad_norm": 2.2663531745010843, + "learning_rate": 1.651728873819855e-06, + "loss": 0.576, + "step": 10450 + }, + { + "epoch": 0.74, + "grad_norm": 1.4848480139106952, + "learning_rate": 1.650875506956146e-06, + "loss": 0.496, + "step": 10451 + }, + { + "epoch": 0.74, + "grad_norm": 1.6867404433647457, + "learning_rate": 1.650022317004543e-06, + "loss": 0.5314, + "step": 10452 + }, + { + "epoch": 0.74, + "grad_norm": 2.010497067852514, + "learning_rate": 1.6491693040101098e-06, + "loss": 0.5685, + "step": 10453 + }, + { + "epoch": 0.74, + "grad_norm": 1.731167480927865, + "learning_rate": 1.648316468017911e-06, + "loss": 0.5521, + "step": 10454 + }, + { + "epoch": 0.74, + "grad_norm": 1.7001376783878632, + "learning_rate": 1.6474638090729916e-06, + "loss": 0.4898, + "step": 10455 + }, + { + "epoch": 0.74, + "grad_norm": 1.7116000092731083, + "learning_rate": 1.6466113272203937e-06, + "loss": 0.5335, + "step": 10456 + }, + { + "epoch": 0.74, + "grad_norm": 0.7295693826471109, + "learning_rate": 1.6457590225051484e-06, + "loss": 0.4403, + "step": 10457 + }, + { + "epoch": 0.74, + "grad_norm": 1.712322168164006, + "learning_rate": 1.6449068949722775e-06, + "loss": 0.554, + "step": 10458 + }, + { + "epoch": 0.74, + "grad_norm": 1.6914116024619779, + "learning_rate": 1.6440549446667942e-06, + "loss": 0.5551, + "step": 10459 + }, + { + "epoch": 0.74, + "grad_norm": 1.581935491863476, + "learning_rate": 1.6432031716336988e-06, + "loss": 0.5244, + "step": 10460 + }, + { + "epoch": 0.74, + "grad_norm": 1.61089403713094, + "learning_rate": 1.6423515759179865e-06, + "loss": 0.504, + "step": 10461 + }, + { + "epoch": 0.74, + "grad_norm": 3.9893262656655604, + "learning_rate": 1.6415001575646412e-06, + "loss": 0.5341, + "step": 10462 + }, + { + "epoch": 0.74, + "grad_norm": 1.8189755653690287, + "learning_rate": 1.6406489166186373e-06, + "loss": 0.5838, + "step": 10463 + }, + { + "epoch": 0.74, + "grad_norm": 1.8043261206860068, + "learning_rate": 1.6397978531249409e-06, + "loss": 0.5363, + "step": 10464 + }, + { + "epoch": 0.74, + "grad_norm": 2.2655373867020736, + "learning_rate": 1.638946967128508e-06, + "loss": 0.5217, + "step": 10465 + }, + { + "epoch": 0.74, + "grad_norm": 1.505133999515219, + "learning_rate": 1.6380962586742865e-06, + "loss": 0.517, + "step": 10466 + }, + { + "epoch": 0.74, + "grad_norm": 1.4141230442163966, + "learning_rate": 1.6372457278072107e-06, + "loss": 0.4839, + "step": 10467 + }, + { + "epoch": 0.74, + "grad_norm": 6.582674375931129, + "learning_rate": 1.63639537457221e-06, + "loss": 0.5071, + "step": 10468 + }, + { + "epoch": 0.74, + "grad_norm": 1.8354463835693777, + "learning_rate": 1.6355451990142028e-06, + "loss": 0.5668, + "step": 10469 + }, + { + "epoch": 0.74, + "grad_norm": 1.5745620154106472, + "learning_rate": 1.6346952011781008e-06, + "loss": 0.5474, + "step": 10470 + }, + { + "epoch": 0.74, + "grad_norm": 1.7788333448717903, + "learning_rate": 1.6338453811087996e-06, + "loss": 0.5219, + "step": 10471 + }, + { + "epoch": 0.74, + "grad_norm": 2.4823211679482955, + "learning_rate": 1.632995738851192e-06, + "loss": 0.5308, + "step": 10472 + }, + { + "epoch": 0.74, + "grad_norm": 1.7478644535050798, + "learning_rate": 1.6321462744501575e-06, + "loss": 0.5174, + "step": 10473 + }, + { + "epoch": 0.74, + "grad_norm": 1.6357623793709244, + "learning_rate": 1.631296987950569e-06, + "loss": 0.5449, + "step": 10474 + }, + { + "epoch": 0.74, + "grad_norm": 1.9629081222425508, + "learning_rate": 1.63044787939729e-06, + "loss": 0.5921, + "step": 10475 + }, + { + "epoch": 0.74, + "grad_norm": 1.9732968490301797, + "learning_rate": 1.6295989488351682e-06, + "loss": 0.5026, + "step": 10476 + }, + { + "epoch": 0.74, + "grad_norm": 1.7829393018748605, + "learning_rate": 1.628750196309054e-06, + "loss": 0.5283, + "step": 10477 + }, + { + "epoch": 0.74, + "grad_norm": 0.7256328329260381, + "learning_rate": 1.627901621863776e-06, + "loss": 0.4463, + "step": 10478 + }, + { + "epoch": 0.74, + "grad_norm": 1.8012742636433268, + "learning_rate": 1.6270532255441608e-06, + "loss": 0.5886, + "step": 10479 + }, + { + "epoch": 0.74, + "grad_norm": 1.5604780476144748, + "learning_rate": 1.6262050073950225e-06, + "loss": 0.5277, + "step": 10480 + }, + { + "epoch": 0.74, + "grad_norm": 1.760169445239215, + "learning_rate": 1.6253569674611686e-06, + "loss": 0.5424, + "step": 10481 + }, + { + "epoch": 0.74, + "grad_norm": 1.7360568362647566, + "learning_rate": 1.6245091057873956e-06, + "loss": 0.4367, + "step": 10482 + }, + { + "epoch": 0.74, + "grad_norm": 1.7388162130083156, + "learning_rate": 1.6236614224184866e-06, + "loss": 0.5544, + "step": 10483 + }, + { + "epoch": 0.74, + "grad_norm": 1.4971832304283674, + "learning_rate": 1.6228139173992248e-06, + "loss": 0.4999, + "step": 10484 + }, + { + "epoch": 0.74, + "grad_norm": 1.5734170549309998, + "learning_rate": 1.6219665907743736e-06, + "loss": 0.525, + "step": 10485 + }, + { + "epoch": 0.74, + "grad_norm": 1.885442262812269, + "learning_rate": 1.6211194425886934e-06, + "loss": 0.5022, + "step": 10486 + }, + { + "epoch": 0.74, + "grad_norm": 1.6523328012455403, + "learning_rate": 1.6202724728869336e-06, + "loss": 0.518, + "step": 10487 + }, + { + "epoch": 0.74, + "grad_norm": 1.8878318726837369, + "learning_rate": 1.6194256817138338e-06, + "loss": 0.5685, + "step": 10488 + }, + { + "epoch": 0.74, + "grad_norm": 1.8269859147306058, + "learning_rate": 1.6185790691141263e-06, + "loss": 0.4993, + "step": 10489 + }, + { + "epoch": 0.74, + "grad_norm": 1.7083580867608161, + "learning_rate": 1.6177326351325284e-06, + "loss": 0.5287, + "step": 10490 + }, + { + "epoch": 0.74, + "grad_norm": 1.683238402067469, + "learning_rate": 1.6168863798137536e-06, + "loss": 0.4902, + "step": 10491 + }, + { + "epoch": 0.74, + "grad_norm": 2.1131166105537824, + "learning_rate": 1.6160403032025034e-06, + "loss": 0.5479, + "step": 10492 + }, + { + "epoch": 0.74, + "grad_norm": 1.7882708144846682, + "learning_rate": 1.615194405343471e-06, + "loss": 0.5669, + "step": 10493 + }, + { + "epoch": 0.74, + "grad_norm": 1.5872525220638656, + "learning_rate": 1.6143486862813407e-06, + "loss": 0.4869, + "step": 10494 + }, + { + "epoch": 0.74, + "grad_norm": 5.346798934728638, + "learning_rate": 1.6135031460607819e-06, + "loss": 0.5017, + "step": 10495 + }, + { + "epoch": 0.74, + "grad_norm": 1.6014576561987575, + "learning_rate": 1.6126577847264656e-06, + "loss": 0.4556, + "step": 10496 + }, + { + "epoch": 0.74, + "grad_norm": 1.8333216252496483, + "learning_rate": 1.6118126023230414e-06, + "loss": 0.4925, + "step": 10497 + }, + { + "epoch": 0.74, + "grad_norm": 1.9077865484524617, + "learning_rate": 1.6109675988951555e-06, + "loss": 0.5476, + "step": 10498 + }, + { + "epoch": 0.75, + "grad_norm": 2.390198977760358, + "learning_rate": 1.6101227744874453e-06, + "loss": 0.5696, + "step": 10499 + }, + { + "epoch": 0.75, + "grad_norm": 0.7029415668056194, + "learning_rate": 1.609278129144538e-06, + "loss": 0.4164, + "step": 10500 + }, + { + "epoch": 0.75, + "grad_norm": 2.109878660666533, + "learning_rate": 1.6084336629110475e-06, + "loss": 0.5355, + "step": 10501 + }, + { + "epoch": 0.75, + "grad_norm": 1.7004587511779912, + "learning_rate": 1.6075893758315813e-06, + "loss": 0.4839, + "step": 10502 + }, + { + "epoch": 0.75, + "grad_norm": 2.097992435180401, + "learning_rate": 1.6067452679507434e-06, + "loss": 0.5764, + "step": 10503 + }, + { + "epoch": 0.75, + "grad_norm": 1.5943383940552824, + "learning_rate": 1.6059013393131161e-06, + "loss": 0.4627, + "step": 10504 + }, + { + "epoch": 0.75, + "grad_norm": 1.6665759002047902, + "learning_rate": 1.6050575899632826e-06, + "loss": 0.4274, + "step": 10505 + }, + { + "epoch": 0.75, + "grad_norm": 1.6095919574384423, + "learning_rate": 1.6042140199458077e-06, + "loss": 0.559, + "step": 10506 + }, + { + "epoch": 0.75, + "grad_norm": 1.6151377466704804, + "learning_rate": 1.603370629305258e-06, + "loss": 0.4201, + "step": 10507 + }, + { + "epoch": 0.75, + "grad_norm": 0.6876774323582775, + "learning_rate": 1.6025274180861793e-06, + "loss": 0.4234, + "step": 10508 + }, + { + "epoch": 0.75, + "grad_norm": 1.7057814339569395, + "learning_rate": 1.6016843863331145e-06, + "loss": 0.5365, + "step": 10509 + }, + { + "epoch": 0.75, + "grad_norm": 1.7113847373611897, + "learning_rate": 1.6008415340905953e-06, + "loss": 0.5012, + "step": 10510 + }, + { + "epoch": 0.75, + "grad_norm": 2.075098300799037, + "learning_rate": 1.5999988614031448e-06, + "loss": 0.4973, + "step": 10511 + }, + { + "epoch": 0.75, + "grad_norm": 1.5291551092016695, + "learning_rate": 1.599156368315276e-06, + "loss": 0.4439, + "step": 10512 + }, + { + "epoch": 0.75, + "grad_norm": 2.092371357322078, + "learning_rate": 1.5983140548714893e-06, + "loss": 0.4321, + "step": 10513 + }, + { + "epoch": 0.75, + "grad_norm": 2.475977271335705, + "learning_rate": 1.597471921116283e-06, + "loss": 0.5681, + "step": 10514 + }, + { + "epoch": 0.75, + "grad_norm": 1.5367767102513537, + "learning_rate": 1.5966299670941382e-06, + "loss": 0.4591, + "step": 10515 + }, + { + "epoch": 0.75, + "grad_norm": 1.4515114442294532, + "learning_rate": 1.59578819284953e-06, + "loss": 0.4992, + "step": 10516 + }, + { + "epoch": 0.75, + "grad_norm": 1.5597355951708003, + "learning_rate": 1.5949465984269252e-06, + "loss": 0.5063, + "step": 10517 + }, + { + "epoch": 0.75, + "grad_norm": 1.7233974701441424, + "learning_rate": 1.5941051838707788e-06, + "loss": 0.5908, + "step": 10518 + }, + { + "epoch": 0.75, + "grad_norm": 1.7795115205809668, + "learning_rate": 1.5932639492255387e-06, + "loss": 0.5009, + "step": 10519 + }, + { + "epoch": 0.75, + "grad_norm": 1.7023634159671173, + "learning_rate": 1.592422894535639e-06, + "loss": 0.5274, + "step": 10520 + }, + { + "epoch": 0.75, + "grad_norm": 2.2900454921527733, + "learning_rate": 1.5915820198455083e-06, + "loss": 0.506, + "step": 10521 + }, + { + "epoch": 0.75, + "grad_norm": 1.5852153479305722, + "learning_rate": 1.5907413251995645e-06, + "loss": 0.4587, + "step": 10522 + }, + { + "epoch": 0.75, + "grad_norm": 1.6076262723829147, + "learning_rate": 1.5899008106422166e-06, + "loss": 0.5701, + "step": 10523 + }, + { + "epoch": 0.75, + "grad_norm": 1.6681457132968678, + "learning_rate": 1.589060476217864e-06, + "loss": 0.5479, + "step": 10524 + }, + { + "epoch": 0.75, + "grad_norm": 0.6910785175580504, + "learning_rate": 1.5882203219708918e-06, + "loss": 0.4107, + "step": 10525 + }, + { + "epoch": 0.75, + "grad_norm": 1.5515541352812257, + "learning_rate": 1.5873803479456862e-06, + "loss": 0.5159, + "step": 10526 + }, + { + "epoch": 0.75, + "grad_norm": 2.4533807687539966, + "learning_rate": 1.5865405541866125e-06, + "loss": 0.6025, + "step": 10527 + }, + { + "epoch": 0.75, + "grad_norm": 1.6343628503124479, + "learning_rate": 1.585700940738033e-06, + "loss": 0.4717, + "step": 10528 + }, + { + "epoch": 0.75, + "grad_norm": 0.7476675367887125, + "learning_rate": 1.584861507644299e-06, + "loss": 0.4455, + "step": 10529 + }, + { + "epoch": 0.75, + "grad_norm": 1.5418299112602618, + "learning_rate": 1.5840222549497525e-06, + "loss": 0.5234, + "step": 10530 + }, + { + "epoch": 0.75, + "grad_norm": 2.496478664328739, + "learning_rate": 1.5831831826987265e-06, + "loss": 0.6286, + "step": 10531 + }, + { + "epoch": 0.75, + "grad_norm": 1.5757495961424142, + "learning_rate": 1.5823442909355396e-06, + "loss": 0.4491, + "step": 10532 + }, + { + "epoch": 0.75, + "grad_norm": 1.9372873790348948, + "learning_rate": 1.5815055797045114e-06, + "loss": 0.5036, + "step": 10533 + }, + { + "epoch": 0.75, + "grad_norm": 0.7707684437897937, + "learning_rate": 1.5806670490499398e-06, + "loss": 0.4355, + "step": 10534 + }, + { + "epoch": 0.75, + "grad_norm": 2.070519191230848, + "learning_rate": 1.5798286990161228e-06, + "loss": 0.5803, + "step": 10535 + }, + { + "epoch": 0.75, + "grad_norm": 1.9614394164162958, + "learning_rate": 1.5789905296473402e-06, + "loss": 0.5418, + "step": 10536 + }, + { + "epoch": 0.75, + "grad_norm": 1.7917863946270411, + "learning_rate": 1.5781525409878723e-06, + "loss": 0.5227, + "step": 10537 + }, + { + "epoch": 0.75, + "grad_norm": 1.8560592386632566, + "learning_rate": 1.577314733081981e-06, + "loss": 0.5506, + "step": 10538 + }, + { + "epoch": 0.75, + "grad_norm": 1.6879649386929427, + "learning_rate": 1.5764771059739226e-06, + "loss": 0.5161, + "step": 10539 + }, + { + "epoch": 0.75, + "grad_norm": 1.980745341681599, + "learning_rate": 1.5756396597079444e-06, + "loss": 0.5308, + "step": 10540 + }, + { + "epoch": 0.75, + "grad_norm": 1.70962004496072, + "learning_rate": 1.5748023943282826e-06, + "loss": 0.5317, + "step": 10541 + }, + { + "epoch": 0.75, + "grad_norm": 1.6834825958134105, + "learning_rate": 1.5739653098791657e-06, + "loss": 0.4819, + "step": 10542 + }, + { + "epoch": 0.75, + "grad_norm": 1.868616147458286, + "learning_rate": 1.573128406404807e-06, + "loss": 0.5222, + "step": 10543 + }, + { + "epoch": 0.75, + "grad_norm": 1.60210951861304, + "learning_rate": 1.5722916839494212e-06, + "loss": 0.4685, + "step": 10544 + }, + { + "epoch": 0.75, + "grad_norm": 2.1592221601056694, + "learning_rate": 1.5714551425572016e-06, + "loss": 0.5486, + "step": 10545 + }, + { + "epoch": 0.75, + "grad_norm": 0.6927347568386585, + "learning_rate": 1.570618782272339e-06, + "loss": 0.4226, + "step": 10546 + }, + { + "epoch": 0.75, + "grad_norm": 1.712883565939955, + "learning_rate": 1.5697826031390124e-06, + "loss": 0.5945, + "step": 10547 + }, + { + "epoch": 0.75, + "grad_norm": 1.6405186910954446, + "learning_rate": 1.568946605201392e-06, + "loss": 0.5348, + "step": 10548 + }, + { + "epoch": 0.75, + "grad_norm": 1.5340521854019662, + "learning_rate": 1.5681107885036395e-06, + "loss": 0.5331, + "step": 10549 + }, + { + "epoch": 0.75, + "grad_norm": 3.706074225740816, + "learning_rate": 1.5672751530899022e-06, + "loss": 0.4716, + "step": 10550 + }, + { + "epoch": 0.75, + "grad_norm": 2.5059568131667644, + "learning_rate": 1.5664396990043235e-06, + "loss": 0.4394, + "step": 10551 + }, + { + "epoch": 0.75, + "grad_norm": 1.8163896571578324, + "learning_rate": 1.5656044262910336e-06, + "loss": 0.5711, + "step": 10552 + }, + { + "epoch": 0.75, + "grad_norm": 3.206830705180215, + "learning_rate": 1.5647693349941556e-06, + "loss": 0.5363, + "step": 10553 + }, + { + "epoch": 0.75, + "grad_norm": 1.4996482187963185, + "learning_rate": 1.5639344251578037e-06, + "loss": 0.5065, + "step": 10554 + }, + { + "epoch": 0.75, + "grad_norm": 2.72390835082974, + "learning_rate": 1.5630996968260743e-06, + "loss": 0.5694, + "step": 10555 + }, + { + "epoch": 0.75, + "grad_norm": 1.6030333660602594, + "learning_rate": 1.5622651500430685e-06, + "loss": 0.5327, + "step": 10556 + }, + { + "epoch": 0.75, + "grad_norm": 1.7613705442948675, + "learning_rate": 1.5614307848528642e-06, + "loss": 0.4916, + "step": 10557 + }, + { + "epoch": 0.75, + "grad_norm": 2.4522919803883747, + "learning_rate": 1.560596601299537e-06, + "loss": 0.5297, + "step": 10558 + }, + { + "epoch": 0.75, + "grad_norm": 1.7244155153954548, + "learning_rate": 1.5597625994271526e-06, + "loss": 0.5303, + "step": 10559 + }, + { + "epoch": 0.75, + "grad_norm": 1.631481795986207, + "learning_rate": 1.558928779279764e-06, + "loss": 0.4773, + "step": 10560 + }, + { + "epoch": 0.75, + "grad_norm": 2.03245063943376, + "learning_rate": 1.558095140901419e-06, + "loss": 0.5751, + "step": 10561 + }, + { + "epoch": 0.75, + "grad_norm": 1.974918995766225, + "learning_rate": 1.5572616843361482e-06, + "loss": 0.4778, + "step": 10562 + }, + { + "epoch": 0.75, + "grad_norm": 1.5285489382674438, + "learning_rate": 1.5564284096279842e-06, + "loss": 0.4815, + "step": 10563 + }, + { + "epoch": 0.75, + "grad_norm": 1.8486274103134617, + "learning_rate": 1.5555953168209388e-06, + "loss": 0.511, + "step": 10564 + }, + { + "epoch": 0.75, + "grad_norm": 0.6815884474658551, + "learning_rate": 1.5547624059590205e-06, + "loss": 0.4185, + "step": 10565 + }, + { + "epoch": 0.75, + "grad_norm": 1.6976080859535605, + "learning_rate": 1.553929677086224e-06, + "loss": 0.5592, + "step": 10566 + }, + { + "epoch": 0.75, + "grad_norm": 1.8389683258075102, + "learning_rate": 1.5530971302465393e-06, + "loss": 0.571, + "step": 10567 + }, + { + "epoch": 0.75, + "grad_norm": 2.2126127615792184, + "learning_rate": 1.5522647654839463e-06, + "loss": 0.5754, + "step": 10568 + }, + { + "epoch": 0.75, + "grad_norm": 2.1721316387773975, + "learning_rate": 1.5514325828424087e-06, + "loss": 0.5489, + "step": 10569 + }, + { + "epoch": 0.75, + "grad_norm": 0.6486859796974606, + "learning_rate": 1.5506005823658882e-06, + "loss": 0.4519, + "step": 10570 + }, + { + "epoch": 0.75, + "grad_norm": 2.022804116013147, + "learning_rate": 1.5497687640983322e-06, + "loss": 0.4846, + "step": 10571 + }, + { + "epoch": 0.75, + "grad_norm": 1.6858171529752886, + "learning_rate": 1.5489371280836834e-06, + "loss": 0.541, + "step": 10572 + }, + { + "epoch": 0.75, + "grad_norm": 1.9118269638462828, + "learning_rate": 1.5481056743658673e-06, + "loss": 0.4646, + "step": 10573 + }, + { + "epoch": 0.75, + "grad_norm": 1.808211036897223, + "learning_rate": 1.547274402988806e-06, + "loss": 0.4882, + "step": 10574 + }, + { + "epoch": 0.75, + "grad_norm": 1.7831761138572344, + "learning_rate": 1.5464433139964102e-06, + "loss": 0.5383, + "step": 10575 + }, + { + "epoch": 0.75, + "grad_norm": 0.8271366007488715, + "learning_rate": 1.5456124074325813e-06, + "loss": 0.4213, + "step": 10576 + }, + { + "epoch": 0.75, + "grad_norm": 1.9366548898578648, + "learning_rate": 1.54478168334121e-06, + "loss": 0.5378, + "step": 10577 + }, + { + "epoch": 0.75, + "grad_norm": 0.680826441158982, + "learning_rate": 1.5439511417661774e-06, + "loss": 0.4388, + "step": 10578 + }, + { + "epoch": 0.75, + "grad_norm": 1.7966196531816798, + "learning_rate": 1.5431207827513583e-06, + "loss": 0.5142, + "step": 10579 + }, + { + "epoch": 0.75, + "grad_norm": 1.7021357834542508, + "learning_rate": 1.5422906063406113e-06, + "loss": 0.5408, + "step": 10580 + }, + { + "epoch": 0.75, + "grad_norm": 1.7826831807680679, + "learning_rate": 1.5414606125777909e-06, + "loss": 0.5259, + "step": 10581 + }, + { + "epoch": 0.75, + "grad_norm": 2.0601909195649655, + "learning_rate": 1.5406308015067395e-06, + "loss": 0.4754, + "step": 10582 + }, + { + "epoch": 0.75, + "grad_norm": 1.6626932677704283, + "learning_rate": 1.539801173171292e-06, + "loss": 0.5245, + "step": 10583 + }, + { + "epoch": 0.75, + "grad_norm": 1.5636834247415081, + "learning_rate": 1.538971727615272e-06, + "loss": 0.5477, + "step": 10584 + }, + { + "epoch": 0.75, + "grad_norm": 1.5277738934296092, + "learning_rate": 1.5381424648824906e-06, + "loss": 0.5121, + "step": 10585 + }, + { + "epoch": 0.75, + "grad_norm": 1.9488143339892243, + "learning_rate": 1.5373133850167571e-06, + "loss": 0.5093, + "step": 10586 + }, + { + "epoch": 0.75, + "grad_norm": 1.779370984788307, + "learning_rate": 1.5364844880618623e-06, + "loss": 0.492, + "step": 10587 + }, + { + "epoch": 0.75, + "grad_norm": 0.7089197687603893, + "learning_rate": 1.535655774061593e-06, + "loss": 0.4474, + "step": 10588 + }, + { + "epoch": 0.75, + "grad_norm": 1.7385138278494736, + "learning_rate": 1.5348272430597244e-06, + "loss": 0.5323, + "step": 10589 + }, + { + "epoch": 0.75, + "grad_norm": 1.7124121394045315, + "learning_rate": 1.5339988951000223e-06, + "loss": 0.5023, + "step": 10590 + }, + { + "epoch": 0.75, + "grad_norm": 1.675266686172874, + "learning_rate": 1.533170730226245e-06, + "loss": 0.4993, + "step": 10591 + }, + { + "epoch": 0.75, + "grad_norm": 1.9899787294938223, + "learning_rate": 1.5323427484821335e-06, + "loss": 0.4556, + "step": 10592 + }, + { + "epoch": 0.75, + "grad_norm": 1.643389436258584, + "learning_rate": 1.5315149499114312e-06, + "loss": 0.5631, + "step": 10593 + }, + { + "epoch": 0.75, + "grad_norm": 1.9029607579165586, + "learning_rate": 1.5306873345578604e-06, + "loss": 0.487, + "step": 10594 + }, + { + "epoch": 0.75, + "grad_norm": 1.601861228143719, + "learning_rate": 1.5298599024651405e-06, + "loss": 0.5172, + "step": 10595 + }, + { + "epoch": 0.75, + "grad_norm": 1.6417143393739395, + "learning_rate": 1.5290326536769789e-06, + "loss": 0.5443, + "step": 10596 + }, + { + "epoch": 0.75, + "grad_norm": 1.459931469995595, + "learning_rate": 1.528205588237074e-06, + "loss": 0.4863, + "step": 10597 + }, + { + "epoch": 0.75, + "grad_norm": 1.7914135208721658, + "learning_rate": 1.527378706189115e-06, + "loss": 0.5246, + "step": 10598 + }, + { + "epoch": 0.75, + "grad_norm": 1.9367377870755567, + "learning_rate": 1.5265520075767782e-06, + "loss": 0.5154, + "step": 10599 + }, + { + "epoch": 0.75, + "grad_norm": 1.8034822817888396, + "learning_rate": 1.5257254924437343e-06, + "loss": 0.5165, + "step": 10600 + }, + { + "epoch": 0.75, + "grad_norm": 1.4107578533780403, + "learning_rate": 1.5248991608336421e-06, + "loss": 0.5249, + "step": 10601 + }, + { + "epoch": 0.75, + "grad_norm": 2.0529019119313725, + "learning_rate": 1.524073012790153e-06, + "loss": 0.5143, + "step": 10602 + }, + { + "epoch": 0.75, + "grad_norm": 1.8190727879715343, + "learning_rate": 1.523247048356904e-06, + "loss": 0.4724, + "step": 10603 + }, + { + "epoch": 0.75, + "grad_norm": 1.6113187561466273, + "learning_rate": 1.5224212675775267e-06, + "loss": 0.5672, + "step": 10604 + }, + { + "epoch": 0.75, + "grad_norm": 2.16565101494338, + "learning_rate": 1.5215956704956415e-06, + "loss": 0.541, + "step": 10605 + }, + { + "epoch": 0.75, + "grad_norm": 1.9207470862072435, + "learning_rate": 1.5207702571548593e-06, + "loss": 0.5665, + "step": 10606 + }, + { + "epoch": 0.75, + "grad_norm": 1.7415394523381789, + "learning_rate": 1.519945027598782e-06, + "loss": 0.4826, + "step": 10607 + }, + { + "epoch": 0.75, + "grad_norm": 1.752841273068188, + "learning_rate": 1.5191199818710001e-06, + "loss": 0.5866, + "step": 10608 + }, + { + "epoch": 0.75, + "grad_norm": 1.8213531127537046, + "learning_rate": 1.518295120015097e-06, + "loss": 0.4931, + "step": 10609 + }, + { + "epoch": 0.75, + "grad_norm": 1.647190556206935, + "learning_rate": 1.5174704420746416e-06, + "loss": 0.5573, + "step": 10610 + }, + { + "epoch": 0.75, + "grad_norm": 1.6092606191379788, + "learning_rate": 1.5166459480931982e-06, + "loss": 0.5418, + "step": 10611 + }, + { + "epoch": 0.75, + "grad_norm": 1.7350699219259809, + "learning_rate": 1.5158216381143193e-06, + "loss": 0.486, + "step": 10612 + }, + { + "epoch": 0.75, + "grad_norm": 0.7459738584848288, + "learning_rate": 1.5149975121815469e-06, + "loss": 0.4113, + "step": 10613 + }, + { + "epoch": 0.75, + "grad_norm": 0.641178616146587, + "learning_rate": 1.5141735703384164e-06, + "loss": 0.3935, + "step": 10614 + }, + { + "epoch": 0.75, + "grad_norm": 1.7813011003941057, + "learning_rate": 1.5133498126284462e-06, + "loss": 0.4974, + "step": 10615 + }, + { + "epoch": 0.75, + "grad_norm": 1.7115000460670287, + "learning_rate": 1.5125262390951567e-06, + "loss": 0.5307, + "step": 10616 + }, + { + "epoch": 0.75, + "grad_norm": 1.8442863263768914, + "learning_rate": 1.5117028497820468e-06, + "loss": 0.5694, + "step": 10617 + }, + { + "epoch": 0.75, + "grad_norm": 1.7398189795154542, + "learning_rate": 1.5108796447326124e-06, + "loss": 0.489, + "step": 10618 + }, + { + "epoch": 0.75, + "grad_norm": 2.309662876245267, + "learning_rate": 1.510056623990338e-06, + "loss": 0.5085, + "step": 10619 + }, + { + "epoch": 0.75, + "grad_norm": 1.889164999736863, + "learning_rate": 1.5092337875986979e-06, + "loss": 0.5696, + "step": 10620 + }, + { + "epoch": 0.75, + "grad_norm": 2.8164331110931573, + "learning_rate": 1.5084111356011593e-06, + "loss": 0.5543, + "step": 10621 + }, + { + "epoch": 0.75, + "grad_norm": 3.0112979218009968, + "learning_rate": 1.5075886680411744e-06, + "loss": 0.4711, + "step": 10622 + }, + { + "epoch": 0.75, + "grad_norm": 1.71695761659884, + "learning_rate": 1.5067663849621894e-06, + "loss": 0.5502, + "step": 10623 + }, + { + "epoch": 0.75, + "grad_norm": 1.5982064913156648, + "learning_rate": 1.5059442864076407e-06, + "loss": 0.4946, + "step": 10624 + }, + { + "epoch": 0.75, + "grad_norm": 0.6241593659583228, + "learning_rate": 1.5051223724209547e-06, + "loss": 0.4315, + "step": 10625 + }, + { + "epoch": 0.75, + "grad_norm": 1.6089190494309604, + "learning_rate": 1.504300643045547e-06, + "loss": 0.5031, + "step": 10626 + }, + { + "epoch": 0.75, + "grad_norm": 1.5581866746379132, + "learning_rate": 1.5034790983248243e-06, + "loss": 0.4679, + "step": 10627 + }, + { + "epoch": 0.75, + "grad_norm": 1.5239901666291509, + "learning_rate": 1.5026577383021852e-06, + "loss": 0.5125, + "step": 10628 + }, + { + "epoch": 0.75, + "grad_norm": 1.5901763065985381, + "learning_rate": 1.501836563021013e-06, + "loss": 0.4958, + "step": 10629 + }, + { + "epoch": 0.75, + "grad_norm": 1.5760596647339957, + "learning_rate": 1.5010155725246872e-06, + "loss": 0.5044, + "step": 10630 + }, + { + "epoch": 0.75, + "grad_norm": 2.5948959757516312, + "learning_rate": 1.5001947668565753e-06, + "loss": 0.5266, + "step": 10631 + }, + { + "epoch": 0.75, + "grad_norm": 1.6151573997010014, + "learning_rate": 1.4993741460600358e-06, + "loss": 0.4562, + "step": 10632 + }, + { + "epoch": 0.75, + "grad_norm": 1.5757647653396676, + "learning_rate": 1.4985537101784143e-06, + "loss": 0.5316, + "step": 10633 + }, + { + "epoch": 0.75, + "grad_norm": 1.7078402249890485, + "learning_rate": 1.497733459255049e-06, + "loss": 0.5506, + "step": 10634 + }, + { + "epoch": 0.75, + "grad_norm": 1.5952281306193585, + "learning_rate": 1.4969133933332725e-06, + "loss": 0.5569, + "step": 10635 + }, + { + "epoch": 0.75, + "grad_norm": 1.805018857226252, + "learning_rate": 1.496093512456399e-06, + "loss": 0.56, + "step": 10636 + }, + { + "epoch": 0.75, + "grad_norm": 1.9116389656346477, + "learning_rate": 1.495273816667741e-06, + "loss": 0.509, + "step": 10637 + }, + { + "epoch": 0.75, + "grad_norm": 1.6188495359660995, + "learning_rate": 1.4944543060105927e-06, + "loss": 0.5524, + "step": 10638 + }, + { + "epoch": 0.75, + "grad_norm": 2.078024210505921, + "learning_rate": 1.4936349805282492e-06, + "loss": 0.5228, + "step": 10639 + }, + { + "epoch": 0.76, + "grad_norm": 1.575664277284591, + "learning_rate": 1.4928158402639852e-06, + "loss": 0.4744, + "step": 10640 + }, + { + "epoch": 0.76, + "grad_norm": 0.6060302950906317, + "learning_rate": 1.4919968852610728e-06, + "loss": 0.4375, + "step": 10641 + }, + { + "epoch": 0.76, + "grad_norm": 1.8948057755284016, + "learning_rate": 1.491178115562772e-06, + "loss": 0.5376, + "step": 10642 + }, + { + "epoch": 0.76, + "grad_norm": 1.526267861905416, + "learning_rate": 1.4903595312123321e-06, + "loss": 0.4941, + "step": 10643 + }, + { + "epoch": 0.76, + "grad_norm": 1.8339647936393995, + "learning_rate": 1.4895411322529956e-06, + "loss": 0.5098, + "step": 10644 + }, + { + "epoch": 0.76, + "grad_norm": 2.138151412327116, + "learning_rate": 1.488722918727989e-06, + "loss": 0.4921, + "step": 10645 + }, + { + "epoch": 0.76, + "grad_norm": 3.9081826770571655, + "learning_rate": 1.4879048906805388e-06, + "loss": 0.4607, + "step": 10646 + }, + { + "epoch": 0.76, + "grad_norm": 1.563515562211448, + "learning_rate": 1.4870870481538508e-06, + "loss": 0.4824, + "step": 10647 + }, + { + "epoch": 0.76, + "grad_norm": 1.899819854147182, + "learning_rate": 1.4862693911911286e-06, + "loss": 0.4856, + "step": 10648 + }, + { + "epoch": 0.76, + "grad_norm": 1.782140482090628, + "learning_rate": 1.4854519198355633e-06, + "loss": 0.5292, + "step": 10649 + }, + { + "epoch": 0.76, + "grad_norm": 1.8402595544170228, + "learning_rate": 1.4846346341303363e-06, + "loss": 0.5989, + "step": 10650 + }, + { + "epoch": 0.76, + "grad_norm": 1.6722973764078668, + "learning_rate": 1.4838175341186212e-06, + "loss": 0.5475, + "step": 10651 + }, + { + "epoch": 0.76, + "grad_norm": 1.9788795017011398, + "learning_rate": 1.4830006198435771e-06, + "loss": 0.4595, + "step": 10652 + }, + { + "epoch": 0.76, + "grad_norm": 0.7128330190419396, + "learning_rate": 1.4821838913483572e-06, + "loss": 0.4148, + "step": 10653 + }, + { + "epoch": 0.76, + "grad_norm": 1.8108972455284293, + "learning_rate": 1.4813673486761039e-06, + "loss": 0.5129, + "step": 10654 + }, + { + "epoch": 0.76, + "grad_norm": 1.6221522997641011, + "learning_rate": 1.4805509918699507e-06, + "loss": 0.5082, + "step": 10655 + }, + { + "epoch": 0.76, + "grad_norm": 1.7417044562230422, + "learning_rate": 1.4797348209730188e-06, + "loss": 0.5672, + "step": 10656 + }, + { + "epoch": 0.76, + "grad_norm": 1.5801951396392688, + "learning_rate": 1.4789188360284223e-06, + "loss": 0.5204, + "step": 10657 + }, + { + "epoch": 0.76, + "grad_norm": 1.4968218956063872, + "learning_rate": 1.478103037079266e-06, + "loss": 0.4329, + "step": 10658 + }, + { + "epoch": 0.76, + "grad_norm": 1.4940954964448796, + "learning_rate": 1.4772874241686391e-06, + "loss": 0.491, + "step": 10659 + }, + { + "epoch": 0.76, + "grad_norm": 1.5655389373557391, + "learning_rate": 1.4764719973396269e-06, + "loss": 0.4908, + "step": 10660 + }, + { + "epoch": 0.76, + "grad_norm": 1.649701363073898, + "learning_rate": 1.4756567566353037e-06, + "loss": 0.4976, + "step": 10661 + }, + { + "epoch": 0.76, + "grad_norm": 1.8619229021719754, + "learning_rate": 1.4748417020987322e-06, + "loss": 0.4679, + "step": 10662 + }, + { + "epoch": 0.76, + "grad_norm": 3.0286197317647723, + "learning_rate": 1.4740268337729684e-06, + "loss": 0.4459, + "step": 10663 + }, + { + "epoch": 0.76, + "grad_norm": 1.4768557174194714, + "learning_rate": 1.4732121517010518e-06, + "loss": 0.5001, + "step": 10664 + }, + { + "epoch": 0.76, + "grad_norm": 1.7256660922611382, + "learning_rate": 1.4723976559260234e-06, + "loss": 0.5622, + "step": 10665 + }, + { + "epoch": 0.76, + "grad_norm": 1.7359346787799372, + "learning_rate": 1.471583346490902e-06, + "loss": 0.5457, + "step": 10666 + }, + { + "epoch": 0.76, + "grad_norm": 1.529220143912608, + "learning_rate": 1.4707692234387055e-06, + "loss": 0.483, + "step": 10667 + }, + { + "epoch": 0.76, + "grad_norm": 0.6626058888944303, + "learning_rate": 1.4699552868124339e-06, + "loss": 0.4269, + "step": 10668 + }, + { + "epoch": 0.76, + "grad_norm": 1.4868942135751833, + "learning_rate": 1.4691415366550893e-06, + "loss": 0.5155, + "step": 10669 + }, + { + "epoch": 0.76, + "grad_norm": 1.5112318244480527, + "learning_rate": 1.4683279730096512e-06, + "loss": 0.4939, + "step": 10670 + }, + { + "epoch": 0.76, + "grad_norm": 1.8057538409751797, + "learning_rate": 1.4675145959190945e-06, + "loss": 0.4625, + "step": 10671 + }, + { + "epoch": 0.76, + "grad_norm": 1.6114983304995802, + "learning_rate": 1.46670140542639e-06, + "loss": 0.5652, + "step": 10672 + }, + { + "epoch": 0.76, + "grad_norm": 1.67434590180966, + "learning_rate": 1.4658884015744884e-06, + "loss": 0.5353, + "step": 10673 + }, + { + "epoch": 0.76, + "grad_norm": 4.65392258370727, + "learning_rate": 1.465075584406338e-06, + "loss": 0.5359, + "step": 10674 + }, + { + "epoch": 0.76, + "grad_norm": 1.514195136264466, + "learning_rate": 1.4642629539648701e-06, + "loss": 0.5118, + "step": 10675 + }, + { + "epoch": 0.76, + "grad_norm": 1.4945172739297936, + "learning_rate": 1.4634505102930163e-06, + "loss": 0.5151, + "step": 10676 + }, + { + "epoch": 0.76, + "grad_norm": 1.5406396533403712, + "learning_rate": 1.462638253433689e-06, + "loss": 0.4996, + "step": 10677 + }, + { + "epoch": 0.76, + "grad_norm": 1.787443911491598, + "learning_rate": 1.4618261834297952e-06, + "loss": 0.5226, + "step": 10678 + }, + { + "epoch": 0.76, + "grad_norm": 1.8455987551687656, + "learning_rate": 1.4610143003242316e-06, + "loss": 0.5735, + "step": 10679 + }, + { + "epoch": 0.76, + "grad_norm": 1.766502041386081, + "learning_rate": 1.4602026041598838e-06, + "loss": 0.4929, + "step": 10680 + }, + { + "epoch": 0.76, + "grad_norm": 1.944532504643116, + "learning_rate": 1.459391094979631e-06, + "loss": 0.5726, + "step": 10681 + }, + { + "epoch": 0.76, + "grad_norm": 1.5302360011048066, + "learning_rate": 1.4585797728263363e-06, + "loss": 0.5214, + "step": 10682 + }, + { + "epoch": 0.76, + "grad_norm": 1.7718192933221877, + "learning_rate": 1.4577686377428573e-06, + "loss": 0.5415, + "step": 10683 + }, + { + "epoch": 0.76, + "grad_norm": 2.0937012353019915, + "learning_rate": 1.4569576897720423e-06, + "loss": 0.5282, + "step": 10684 + }, + { + "epoch": 0.76, + "grad_norm": 1.7610264414283165, + "learning_rate": 1.4561469289567276e-06, + "loss": 0.5357, + "step": 10685 + }, + { + "epoch": 0.76, + "grad_norm": 1.999607693380123, + "learning_rate": 1.4553363553397414e-06, + "loss": 0.5139, + "step": 10686 + }, + { + "epoch": 0.76, + "grad_norm": 0.7319133603518555, + "learning_rate": 1.4545259689638968e-06, + "loss": 0.4205, + "step": 10687 + }, + { + "epoch": 0.76, + "grad_norm": 1.6458117214361008, + "learning_rate": 1.4537157698720078e-06, + "loss": 0.4931, + "step": 10688 + }, + { + "epoch": 0.76, + "grad_norm": 4.273140218189718, + "learning_rate": 1.4529057581068661e-06, + "loss": 0.5863, + "step": 10689 + }, + { + "epoch": 0.76, + "grad_norm": 1.6473670781037948, + "learning_rate": 1.4520959337112616e-06, + "loss": 0.5247, + "step": 10690 + }, + { + "epoch": 0.76, + "grad_norm": 3.2844654535239934, + "learning_rate": 1.4512862967279723e-06, + "loss": 0.5588, + "step": 10691 + }, + { + "epoch": 0.76, + "grad_norm": 4.002149913091146, + "learning_rate": 1.450476847199765e-06, + "loss": 0.5292, + "step": 10692 + }, + { + "epoch": 0.76, + "grad_norm": 1.8126966015498267, + "learning_rate": 1.4496675851693997e-06, + "loss": 0.4759, + "step": 10693 + }, + { + "epoch": 0.76, + "grad_norm": 1.730106746541734, + "learning_rate": 1.44885851067962e-06, + "loss": 0.574, + "step": 10694 + }, + { + "epoch": 0.76, + "grad_norm": 1.7648808117702592, + "learning_rate": 1.4480496237731694e-06, + "loss": 0.4735, + "step": 10695 + }, + { + "epoch": 0.76, + "grad_norm": 1.5552632130396524, + "learning_rate": 1.4472409244927721e-06, + "loss": 0.4498, + "step": 10696 + }, + { + "epoch": 0.76, + "grad_norm": 0.7195631372753983, + "learning_rate": 1.4464324128811486e-06, + "loss": 0.4074, + "step": 10697 + }, + { + "epoch": 0.76, + "grad_norm": 1.8913628342970745, + "learning_rate": 1.4456240889810036e-06, + "loss": 0.5087, + "step": 10698 + }, + { + "epoch": 0.76, + "grad_norm": 1.4838989943157586, + "learning_rate": 1.4448159528350391e-06, + "loss": 0.5257, + "step": 10699 + }, + { + "epoch": 0.76, + "grad_norm": 1.8783356377653806, + "learning_rate": 1.4440080044859444e-06, + "loss": 0.5549, + "step": 10700 + }, + { + "epoch": 0.76, + "grad_norm": 1.5347467908333357, + "learning_rate": 1.4432002439763947e-06, + "loss": 0.4645, + "step": 10701 + }, + { + "epoch": 0.76, + "grad_norm": 1.5897542757523386, + "learning_rate": 1.4423926713490605e-06, + "loss": 0.4767, + "step": 10702 + }, + { + "epoch": 0.76, + "grad_norm": 2.01805779961288, + "learning_rate": 1.4415852866465996e-06, + "loss": 0.4746, + "step": 10703 + }, + { + "epoch": 0.76, + "grad_norm": 1.8168347799452822, + "learning_rate": 1.4407780899116635e-06, + "loss": 0.5455, + "step": 10704 + }, + { + "epoch": 0.76, + "grad_norm": 1.7020611176928275, + "learning_rate": 1.4399710811868855e-06, + "loss": 0.576, + "step": 10705 + }, + { + "epoch": 0.76, + "grad_norm": 1.7232590412084658, + "learning_rate": 1.4391642605149015e-06, + "loss": 0.5067, + "step": 10706 + }, + { + "epoch": 0.76, + "grad_norm": 1.8275071233720077, + "learning_rate": 1.4383576279383249e-06, + "loss": 0.4832, + "step": 10707 + }, + { + "epoch": 0.76, + "grad_norm": 3.2919671787304994, + "learning_rate": 1.4375511834997675e-06, + "loss": 0.4615, + "step": 10708 + }, + { + "epoch": 0.76, + "grad_norm": 1.5733899964307254, + "learning_rate": 1.4367449272418277e-06, + "loss": 0.5101, + "step": 10709 + }, + { + "epoch": 0.76, + "grad_norm": 1.697898425723707, + "learning_rate": 1.4359388592070944e-06, + "loss": 0.4929, + "step": 10710 + }, + { + "epoch": 0.76, + "grad_norm": 1.8787909848683368, + "learning_rate": 1.435132979438149e-06, + "loss": 0.5485, + "step": 10711 + }, + { + "epoch": 0.76, + "grad_norm": 0.613462879827104, + "learning_rate": 1.4343272879775577e-06, + "loss": 0.408, + "step": 10712 + }, + { + "epoch": 0.76, + "grad_norm": 1.6619895628544024, + "learning_rate": 1.4335217848678811e-06, + "loss": 0.4491, + "step": 10713 + }, + { + "epoch": 0.76, + "grad_norm": 1.6100993244451636, + "learning_rate": 1.4327164701516683e-06, + "loss": 0.4796, + "step": 10714 + }, + { + "epoch": 0.76, + "grad_norm": 1.896784020065865, + "learning_rate": 1.4319113438714599e-06, + "loss": 0.5217, + "step": 10715 + }, + { + "epoch": 0.76, + "grad_norm": 1.6669638644168543, + "learning_rate": 1.4311064060697855e-06, + "loss": 0.5415, + "step": 10716 + }, + { + "epoch": 0.76, + "grad_norm": 1.7126172593838513, + "learning_rate": 1.4303016567891609e-06, + "loss": 0.5096, + "step": 10717 + }, + { + "epoch": 0.76, + "grad_norm": 1.707240071979739, + "learning_rate": 1.4294970960721023e-06, + "loss": 0.5036, + "step": 10718 + }, + { + "epoch": 0.76, + "grad_norm": 2.294482582179599, + "learning_rate": 1.4286927239611032e-06, + "loss": 0.5228, + "step": 10719 + }, + { + "epoch": 0.76, + "grad_norm": 1.548492764423679, + "learning_rate": 1.427888540498656e-06, + "loss": 0.5669, + "step": 10720 + }, + { + "epoch": 0.76, + "grad_norm": 1.489976931028792, + "learning_rate": 1.4270845457272398e-06, + "loss": 0.4885, + "step": 10721 + }, + { + "epoch": 0.76, + "grad_norm": 2.145834950694346, + "learning_rate": 1.4262807396893253e-06, + "loss": 0.5075, + "step": 10722 + }, + { + "epoch": 0.76, + "grad_norm": 1.6544547435093089, + "learning_rate": 1.4254771224273723e-06, + "loss": 0.5448, + "step": 10723 + }, + { + "epoch": 0.76, + "grad_norm": 1.6651615091922791, + "learning_rate": 1.4246736939838267e-06, + "loss": 0.4897, + "step": 10724 + }, + { + "epoch": 0.76, + "grad_norm": 1.4593585620603162, + "learning_rate": 1.4238704544011351e-06, + "loss": 0.5003, + "step": 10725 + }, + { + "epoch": 0.76, + "grad_norm": 1.7245978761357388, + "learning_rate": 1.4230674037217218e-06, + "loss": 0.4534, + "step": 10726 + }, + { + "epoch": 0.76, + "grad_norm": 2.5848753525251906, + "learning_rate": 1.4222645419880088e-06, + "loss": 0.5061, + "step": 10727 + }, + { + "epoch": 0.76, + "grad_norm": 1.6296942640530503, + "learning_rate": 1.421461869242406e-06, + "loss": 0.4651, + "step": 10728 + }, + { + "epoch": 0.76, + "grad_norm": 1.6256920746644146, + "learning_rate": 1.420659385527312e-06, + "loss": 0.5225, + "step": 10729 + }, + { + "epoch": 0.76, + "grad_norm": 1.8940850447915853, + "learning_rate": 1.41985709088512e-06, + "loss": 0.4523, + "step": 10730 + }, + { + "epoch": 0.76, + "grad_norm": 1.6349970355833847, + "learning_rate": 1.4190549853582058e-06, + "loss": 0.4886, + "step": 10731 + }, + { + "epoch": 0.76, + "grad_norm": 1.9063994496970478, + "learning_rate": 1.4182530689889417e-06, + "loss": 0.4775, + "step": 10732 + }, + { + "epoch": 0.76, + "grad_norm": 1.7125627742039657, + "learning_rate": 1.4174513418196867e-06, + "loss": 0.4586, + "step": 10733 + }, + { + "epoch": 0.76, + "grad_norm": 1.7510900369627114, + "learning_rate": 1.4166498038927923e-06, + "loss": 0.5567, + "step": 10734 + }, + { + "epoch": 0.76, + "grad_norm": 1.7634913914957377, + "learning_rate": 1.4158484552505947e-06, + "loss": 0.5058, + "step": 10735 + }, + { + "epoch": 0.76, + "grad_norm": 1.5729081779198033, + "learning_rate": 1.4150472959354295e-06, + "loss": 0.4976, + "step": 10736 + }, + { + "epoch": 0.76, + "grad_norm": 1.8632113637130792, + "learning_rate": 1.4142463259896123e-06, + "loss": 0.5066, + "step": 10737 + }, + { + "epoch": 0.76, + "grad_norm": 1.6688570314621418, + "learning_rate": 1.413445545455454e-06, + "loss": 0.4868, + "step": 10738 + }, + { + "epoch": 0.76, + "grad_norm": 0.7023971128056485, + "learning_rate": 1.4126449543752551e-06, + "loss": 0.4413, + "step": 10739 + }, + { + "epoch": 0.76, + "grad_norm": 1.6909637471205259, + "learning_rate": 1.411844552791305e-06, + "loss": 0.5009, + "step": 10740 + }, + { + "epoch": 0.76, + "grad_norm": 2.1619395615344295, + "learning_rate": 1.4110443407458856e-06, + "loss": 0.5483, + "step": 10741 + }, + { + "epoch": 0.76, + "grad_norm": 1.6149214819350879, + "learning_rate": 1.4102443182812642e-06, + "loss": 0.4382, + "step": 10742 + }, + { + "epoch": 0.76, + "grad_norm": 1.8694029230294038, + "learning_rate": 1.409444485439701e-06, + "loss": 0.5435, + "step": 10743 + }, + { + "epoch": 0.76, + "grad_norm": 1.6490457447757367, + "learning_rate": 1.408644842263447e-06, + "loss": 0.5389, + "step": 10744 + }, + { + "epoch": 0.76, + "grad_norm": 1.6000138393444086, + "learning_rate": 1.4078453887947407e-06, + "loss": 0.5064, + "step": 10745 + }, + { + "epoch": 0.76, + "grad_norm": 1.717760476677669, + "learning_rate": 1.4070461250758156e-06, + "loss": 0.4971, + "step": 10746 + }, + { + "epoch": 0.76, + "grad_norm": 1.5467693725305895, + "learning_rate": 1.4062470511488845e-06, + "loss": 0.4635, + "step": 10747 + }, + { + "epoch": 0.76, + "grad_norm": 1.7248685137234698, + "learning_rate": 1.4054481670561653e-06, + "loss": 0.5078, + "step": 10748 + }, + { + "epoch": 0.76, + "grad_norm": 1.6763223195559056, + "learning_rate": 1.4046494728398524e-06, + "loss": 0.5552, + "step": 10749 + }, + { + "epoch": 0.76, + "grad_norm": 1.5971850813479491, + "learning_rate": 1.403850968542137e-06, + "loss": 0.4917, + "step": 10750 + }, + { + "epoch": 0.76, + "grad_norm": 2.1171878568620532, + "learning_rate": 1.4030526542051985e-06, + "loss": 0.4949, + "step": 10751 + }, + { + "epoch": 0.76, + "grad_norm": 1.8266235665584014, + "learning_rate": 1.402254529871207e-06, + "loss": 0.5276, + "step": 10752 + }, + { + "epoch": 0.76, + "grad_norm": 1.537600336842719, + "learning_rate": 1.401456595582324e-06, + "loss": 0.5503, + "step": 10753 + }, + { + "epoch": 0.76, + "grad_norm": 1.5732412234281759, + "learning_rate": 1.400658851380694e-06, + "loss": 0.4895, + "step": 10754 + }, + { + "epoch": 0.76, + "grad_norm": 2.5202157508929215, + "learning_rate": 1.3998612973084623e-06, + "loss": 0.5683, + "step": 10755 + }, + { + "epoch": 0.76, + "grad_norm": 0.7205810359616004, + "learning_rate": 1.3990639334077549e-06, + "loss": 0.4228, + "step": 10756 + }, + { + "epoch": 0.76, + "grad_norm": 1.8458442072661652, + "learning_rate": 1.3982667597206917e-06, + "loss": 0.495, + "step": 10757 + }, + { + "epoch": 0.76, + "grad_norm": 0.7493755654774225, + "learning_rate": 1.3974697762893829e-06, + "loss": 0.4334, + "step": 10758 + }, + { + "epoch": 0.76, + "grad_norm": 2.0170758999995666, + "learning_rate": 1.3966729831559272e-06, + "loss": 0.5544, + "step": 10759 + }, + { + "epoch": 0.76, + "grad_norm": 1.5806114980049066, + "learning_rate": 1.3958763803624159e-06, + "loss": 0.539, + "step": 10760 + }, + { + "epoch": 0.76, + "grad_norm": 1.7386390559295708, + "learning_rate": 1.3950799679509252e-06, + "loss": 0.5126, + "step": 10761 + }, + { + "epoch": 0.76, + "grad_norm": 1.780698069041306, + "learning_rate": 1.3942837459635262e-06, + "loss": 0.4859, + "step": 10762 + }, + { + "epoch": 0.76, + "grad_norm": 1.5833203439517853, + "learning_rate": 1.393487714442277e-06, + "loss": 0.55, + "step": 10763 + }, + { + "epoch": 0.76, + "grad_norm": 1.7703132117391545, + "learning_rate": 1.392691873429229e-06, + "loss": 0.5311, + "step": 10764 + }, + { + "epoch": 0.76, + "grad_norm": 1.7591659013846739, + "learning_rate": 1.3918962229664174e-06, + "loss": 0.4928, + "step": 10765 + }, + { + "epoch": 0.76, + "grad_norm": 2.3766931020872804, + "learning_rate": 1.3911007630958723e-06, + "loss": 0.5052, + "step": 10766 + }, + { + "epoch": 0.76, + "grad_norm": 1.4238150525240583, + "learning_rate": 1.390305493859616e-06, + "loss": 0.4587, + "step": 10767 + }, + { + "epoch": 0.76, + "grad_norm": 1.486732409296287, + "learning_rate": 1.3895104152996542e-06, + "loss": 0.4477, + "step": 10768 + }, + { + "epoch": 0.76, + "grad_norm": 4.470421234028526, + "learning_rate": 1.3887155274579855e-06, + "loss": 0.5735, + "step": 10769 + }, + { + "epoch": 0.76, + "grad_norm": 1.9718124236404266, + "learning_rate": 1.3879208303765996e-06, + "loss": 0.508, + "step": 10770 + }, + { + "epoch": 0.76, + "grad_norm": 1.6352607679428355, + "learning_rate": 1.3871263240974764e-06, + "loss": 0.5242, + "step": 10771 + }, + { + "epoch": 0.76, + "grad_norm": 6.64926080617895, + "learning_rate": 1.3863320086625814e-06, + "loss": 0.5503, + "step": 10772 + }, + { + "epoch": 0.76, + "grad_norm": 1.9877700749720486, + "learning_rate": 1.3855378841138744e-06, + "loss": 0.5142, + "step": 10773 + }, + { + "epoch": 0.76, + "grad_norm": 1.6574324579675601, + "learning_rate": 1.3847439504933036e-06, + "loss": 0.5299, + "step": 10774 + }, + { + "epoch": 0.76, + "grad_norm": 1.673189786737734, + "learning_rate": 1.383950207842808e-06, + "loss": 0.5598, + "step": 10775 + }, + { + "epoch": 0.76, + "grad_norm": 1.6265973083249583, + "learning_rate": 1.3831566562043165e-06, + "loss": 0.5392, + "step": 10776 + }, + { + "epoch": 0.76, + "grad_norm": 0.7985231841081898, + "learning_rate": 1.3823632956197431e-06, + "loss": 0.4412, + "step": 10777 + }, + { + "epoch": 0.76, + "grad_norm": 1.7135545892219708, + "learning_rate": 1.3815701261310011e-06, + "loss": 0.6281, + "step": 10778 + }, + { + "epoch": 0.76, + "grad_norm": 1.618624885081931, + "learning_rate": 1.3807771477799848e-06, + "loss": 0.4682, + "step": 10779 + }, + { + "epoch": 0.76, + "grad_norm": 1.7250185364305821, + "learning_rate": 1.3799843606085834e-06, + "loss": 0.5448, + "step": 10780 + }, + { + "epoch": 0.77, + "grad_norm": 1.896743668037169, + "learning_rate": 1.3791917646586738e-06, + "loss": 0.4985, + "step": 10781 + }, + { + "epoch": 0.77, + "grad_norm": 1.6575098190862174, + "learning_rate": 1.3783993599721246e-06, + "loss": 0.4842, + "step": 10782 + }, + { + "epoch": 0.77, + "grad_norm": 0.6646799959977178, + "learning_rate": 1.3776071465907947e-06, + "loss": 0.4199, + "step": 10783 + }, + { + "epoch": 0.77, + "grad_norm": 1.6072581843289775, + "learning_rate": 1.376815124556526e-06, + "loss": 0.5164, + "step": 10784 + }, + { + "epoch": 0.77, + "grad_norm": 1.6995449140290093, + "learning_rate": 1.3760232939111628e-06, + "loss": 0.5051, + "step": 10785 + }, + { + "epoch": 0.77, + "grad_norm": 1.9830614836395266, + "learning_rate": 1.3752316546965273e-06, + "loss": 0.5345, + "step": 10786 + }, + { + "epoch": 0.77, + "grad_norm": 1.9445254194837112, + "learning_rate": 1.3744402069544383e-06, + "loss": 0.5365, + "step": 10787 + }, + { + "epoch": 0.77, + "grad_norm": 2.1075986754908067, + "learning_rate": 1.3736489507267026e-06, + "loss": 0.5288, + "step": 10788 + }, + { + "epoch": 0.77, + "grad_norm": 1.6220517537654202, + "learning_rate": 1.3728578860551168e-06, + "loss": 0.5854, + "step": 10789 + }, + { + "epoch": 0.77, + "grad_norm": 1.8437658547550106, + "learning_rate": 1.3720670129814695e-06, + "loss": 0.5561, + "step": 10790 + }, + { + "epoch": 0.77, + "grad_norm": 1.5569581175004117, + "learning_rate": 1.3712763315475347e-06, + "loss": 0.5243, + "step": 10791 + }, + { + "epoch": 0.77, + "grad_norm": 1.4876635475661717, + "learning_rate": 1.3704858417950796e-06, + "loss": 0.5057, + "step": 10792 + }, + { + "epoch": 0.77, + "grad_norm": 1.7237070729382227, + "learning_rate": 1.3696955437658605e-06, + "loss": 0.5354, + "step": 10793 + }, + { + "epoch": 0.77, + "grad_norm": 1.6329757577962747, + "learning_rate": 1.3689054375016236e-06, + "loss": 0.5325, + "step": 10794 + }, + { + "epoch": 0.77, + "grad_norm": 1.8207530199229034, + "learning_rate": 1.368115523044108e-06, + "loss": 0.5187, + "step": 10795 + }, + { + "epoch": 0.77, + "grad_norm": 1.96717407545796, + "learning_rate": 1.3673258004350332e-06, + "loss": 0.485, + "step": 10796 + }, + { + "epoch": 0.77, + "grad_norm": 4.820840660173064, + "learning_rate": 1.3665362697161216e-06, + "loss": 0.5432, + "step": 10797 + }, + { + "epoch": 0.77, + "grad_norm": 1.706725214050547, + "learning_rate": 1.365746930929075e-06, + "loss": 0.4918, + "step": 10798 + }, + { + "epoch": 0.77, + "grad_norm": 1.5185241550654174, + "learning_rate": 1.3649577841155914e-06, + "loss": 0.5179, + "step": 10799 + }, + { + "epoch": 0.77, + "grad_norm": 1.358417799735522, + "learning_rate": 1.3641688293173516e-06, + "loss": 0.4618, + "step": 10800 + }, + { + "epoch": 0.77, + "grad_norm": 1.5536970917126065, + "learning_rate": 1.3633800665760377e-06, + "loss": 0.5222, + "step": 10801 + }, + { + "epoch": 0.77, + "grad_norm": 0.7303642229592839, + "learning_rate": 1.3625914959333092e-06, + "loss": 0.4291, + "step": 10802 + }, + { + "epoch": 0.77, + "grad_norm": 1.7863602360718673, + "learning_rate": 1.3618031174308221e-06, + "loss": 0.5382, + "step": 10803 + }, + { + "epoch": 0.77, + "grad_norm": 1.5937928157830576, + "learning_rate": 1.361014931110225e-06, + "loss": 0.5474, + "step": 10804 + }, + { + "epoch": 0.77, + "grad_norm": 1.824677566969117, + "learning_rate": 1.3602269370131482e-06, + "loss": 0.4961, + "step": 10805 + }, + { + "epoch": 0.77, + "grad_norm": 2.004135958829725, + "learning_rate": 1.3594391351812191e-06, + "loss": 0.4895, + "step": 10806 + }, + { + "epoch": 0.77, + "grad_norm": 3.5644815147600806, + "learning_rate": 1.3586515256560484e-06, + "loss": 0.5761, + "step": 10807 + }, + { + "epoch": 0.77, + "grad_norm": 1.7672489785111498, + "learning_rate": 1.3578641084792455e-06, + "loss": 0.5223, + "step": 10808 + }, + { + "epoch": 0.77, + "grad_norm": 2.396672796929587, + "learning_rate": 1.3570768836923997e-06, + "loss": 0.4696, + "step": 10809 + }, + { + "epoch": 0.77, + "grad_norm": 1.8328926536843815, + "learning_rate": 1.356289851337097e-06, + "loss": 0.5479, + "step": 10810 + }, + { + "epoch": 0.77, + "grad_norm": 1.9344470374285925, + "learning_rate": 1.3555030114549111e-06, + "loss": 0.5148, + "step": 10811 + }, + { + "epoch": 0.77, + "grad_norm": 1.898740355845096, + "learning_rate": 1.3547163640874055e-06, + "loss": 0.54, + "step": 10812 + }, + { + "epoch": 0.77, + "grad_norm": 1.592347259048833, + "learning_rate": 1.353929909276135e-06, + "loss": 0.5496, + "step": 10813 + }, + { + "epoch": 0.77, + "grad_norm": 1.7115295963858392, + "learning_rate": 1.3531436470626392e-06, + "loss": 0.4501, + "step": 10814 + }, + { + "epoch": 0.77, + "grad_norm": 1.982638465216036, + "learning_rate": 1.3523575774884533e-06, + "loss": 0.5668, + "step": 10815 + }, + { + "epoch": 0.77, + "grad_norm": 1.7137491018533926, + "learning_rate": 1.3515717005951007e-06, + "loss": 0.5923, + "step": 10816 + }, + { + "epoch": 0.77, + "grad_norm": 1.4560434586256394, + "learning_rate": 1.3507860164240926e-06, + "loss": 0.4622, + "step": 10817 + }, + { + "epoch": 0.77, + "grad_norm": 1.6763341786919048, + "learning_rate": 1.350000525016933e-06, + "loss": 0.5224, + "step": 10818 + }, + { + "epoch": 0.77, + "grad_norm": 1.5820061868755284, + "learning_rate": 1.3492152264151132e-06, + "loss": 0.4645, + "step": 10819 + }, + { + "epoch": 0.77, + "grad_norm": 1.5955575530072472, + "learning_rate": 1.3484301206601174e-06, + "loss": 0.5529, + "step": 10820 + }, + { + "epoch": 0.77, + "grad_norm": 1.6539871815222473, + "learning_rate": 1.3476452077934143e-06, + "loss": 0.509, + "step": 10821 + }, + { + "epoch": 0.77, + "grad_norm": 1.7803501078801294, + "learning_rate": 1.3468604878564667e-06, + "loss": 0.5686, + "step": 10822 + }, + { + "epoch": 0.77, + "grad_norm": 1.5839262309868443, + "learning_rate": 1.3460759608907275e-06, + "loss": 0.4696, + "step": 10823 + }, + { + "epoch": 0.77, + "grad_norm": 1.638000104485249, + "learning_rate": 1.3452916269376364e-06, + "loss": 0.5251, + "step": 10824 + }, + { + "epoch": 0.77, + "grad_norm": 1.5937625982921233, + "learning_rate": 1.3445074860386275e-06, + "loss": 0.4964, + "step": 10825 + }, + { + "epoch": 0.77, + "grad_norm": 1.4614454784739406, + "learning_rate": 1.3437235382351166e-06, + "loss": 0.4622, + "step": 10826 + }, + { + "epoch": 0.77, + "grad_norm": 1.472918352339485, + "learning_rate": 1.3429397835685203e-06, + "loss": 0.5298, + "step": 10827 + }, + { + "epoch": 0.77, + "grad_norm": 1.6966648336472956, + "learning_rate": 1.3421562220802353e-06, + "loss": 0.5282, + "step": 10828 + }, + { + "epoch": 0.77, + "grad_norm": 1.4272625988031236, + "learning_rate": 1.3413728538116544e-06, + "loss": 0.477, + "step": 10829 + }, + { + "epoch": 0.77, + "grad_norm": 0.6821881276400853, + "learning_rate": 1.3405896788041533e-06, + "loss": 0.4108, + "step": 10830 + }, + { + "epoch": 0.77, + "grad_norm": 1.8282967541759858, + "learning_rate": 1.339806697099107e-06, + "loss": 0.4943, + "step": 10831 + }, + { + "epoch": 0.77, + "grad_norm": 2.250535763471322, + "learning_rate": 1.3390239087378749e-06, + "loss": 0.4816, + "step": 10832 + }, + { + "epoch": 0.77, + "grad_norm": 1.6265469597184998, + "learning_rate": 1.3382413137618017e-06, + "loss": 0.5432, + "step": 10833 + }, + { + "epoch": 0.77, + "grad_norm": 0.6748725763302437, + "learning_rate": 1.3374589122122334e-06, + "loss": 0.4237, + "step": 10834 + }, + { + "epoch": 0.77, + "grad_norm": 1.6431619625285614, + "learning_rate": 1.336676704130494e-06, + "loss": 0.525, + "step": 10835 + }, + { + "epoch": 0.77, + "grad_norm": 1.7352099551862556, + "learning_rate": 1.3358946895579056e-06, + "loss": 0.559, + "step": 10836 + }, + { + "epoch": 0.77, + "grad_norm": 2.260550393493645, + "learning_rate": 1.3351128685357723e-06, + "loss": 0.5052, + "step": 10837 + }, + { + "epoch": 0.77, + "grad_norm": 1.4441147204245448, + "learning_rate": 1.3343312411053989e-06, + "loss": 0.4801, + "step": 10838 + }, + { + "epoch": 0.77, + "grad_norm": 0.7896090140454196, + "learning_rate": 1.3335498073080688e-06, + "loss": 0.4415, + "step": 10839 + }, + { + "epoch": 0.77, + "grad_norm": 1.8723307585971487, + "learning_rate": 1.332768567185061e-06, + "loss": 0.557, + "step": 10840 + }, + { + "epoch": 0.77, + "grad_norm": 2.2690086799152573, + "learning_rate": 1.3319875207776444e-06, + "loss": 0.4677, + "step": 10841 + }, + { + "epoch": 0.77, + "grad_norm": 1.7306168696864754, + "learning_rate": 1.3312066681270758e-06, + "loss": 0.5228, + "step": 10842 + }, + { + "epoch": 0.77, + "grad_norm": 2.2791162568489716, + "learning_rate": 1.3304260092746034e-06, + "loss": 0.4687, + "step": 10843 + }, + { + "epoch": 0.77, + "grad_norm": 0.6640722135241682, + "learning_rate": 1.3296455442614625e-06, + "loss": 0.4259, + "step": 10844 + }, + { + "epoch": 0.77, + "grad_norm": 1.5376700139075838, + "learning_rate": 1.328865273128881e-06, + "loss": 0.4699, + "step": 10845 + }, + { + "epoch": 0.77, + "grad_norm": 1.9620033433990103, + "learning_rate": 1.3280851959180747e-06, + "loss": 0.5042, + "step": 10846 + }, + { + "epoch": 0.77, + "grad_norm": 1.6584580160662243, + "learning_rate": 1.3273053126702512e-06, + "loss": 0.5036, + "step": 10847 + }, + { + "epoch": 0.77, + "grad_norm": 0.7170176569345356, + "learning_rate": 1.326525623426605e-06, + "loss": 0.4238, + "step": 10848 + }, + { + "epoch": 0.77, + "grad_norm": 1.9149581596712926, + "learning_rate": 1.3257461282283236e-06, + "loss": 0.4951, + "step": 10849 + }, + { + "epoch": 0.77, + "grad_norm": 1.7172657234205344, + "learning_rate": 1.3249668271165827e-06, + "loss": 0.4922, + "step": 10850 + }, + { + "epoch": 0.77, + "grad_norm": 2.1579950177673957, + "learning_rate": 1.3241877201325453e-06, + "loss": 0.4799, + "step": 10851 + }, + { + "epoch": 0.77, + "grad_norm": 1.9550172552017977, + "learning_rate": 1.3234088073173684e-06, + "loss": 0.6122, + "step": 10852 + }, + { + "epoch": 0.77, + "grad_norm": 1.5836425870918611, + "learning_rate": 1.3226300887121956e-06, + "loss": 0.5034, + "step": 10853 + }, + { + "epoch": 0.77, + "grad_norm": 1.7411442061465618, + "learning_rate": 1.3218515643581624e-06, + "loss": 0.4813, + "step": 10854 + }, + { + "epoch": 0.77, + "grad_norm": 1.7273748559302142, + "learning_rate": 1.3210732342963944e-06, + "loss": 0.6032, + "step": 10855 + }, + { + "epoch": 0.77, + "grad_norm": 1.8359622808204898, + "learning_rate": 1.3202950985680013e-06, + "loss": 0.5453, + "step": 10856 + }, + { + "epoch": 0.77, + "grad_norm": 1.8134492969100848, + "learning_rate": 1.3195171572140926e-06, + "loss": 0.5803, + "step": 10857 + }, + { + "epoch": 0.77, + "grad_norm": 1.873054262760071, + "learning_rate": 1.3187394102757567e-06, + "loss": 0.5315, + "step": 10858 + }, + { + "epoch": 0.77, + "grad_norm": 3.608522399650705, + "learning_rate": 1.3179618577940796e-06, + "loss": 0.4858, + "step": 10859 + }, + { + "epoch": 0.77, + "grad_norm": 0.7345848305050079, + "learning_rate": 1.3171844998101329e-06, + "loss": 0.4109, + "step": 10860 + }, + { + "epoch": 0.77, + "grad_norm": 1.9721112041195157, + "learning_rate": 1.316407336364981e-06, + "loss": 0.4802, + "step": 10861 + }, + { + "epoch": 0.77, + "grad_norm": 1.6165099301235486, + "learning_rate": 1.3156303674996763e-06, + "loss": 0.5166, + "step": 10862 + }, + { + "epoch": 0.77, + "grad_norm": 1.7498451984947772, + "learning_rate": 1.314853593255258e-06, + "loss": 0.5243, + "step": 10863 + }, + { + "epoch": 0.77, + "grad_norm": 1.759533510534774, + "learning_rate": 1.3140770136727604e-06, + "loss": 0.4907, + "step": 10864 + }, + { + "epoch": 0.77, + "grad_norm": 1.76117665340654, + "learning_rate": 1.3133006287932048e-06, + "loss": 0.5611, + "step": 10865 + }, + { + "epoch": 0.77, + "grad_norm": 2.6206652410470435, + "learning_rate": 1.312524438657603e-06, + "loss": 0.4935, + "step": 10866 + }, + { + "epoch": 0.77, + "grad_norm": 1.6213199008454326, + "learning_rate": 1.3117484433069528e-06, + "loss": 0.5158, + "step": 10867 + }, + { + "epoch": 0.77, + "grad_norm": 1.6494584252806501, + "learning_rate": 1.31097264278225e-06, + "loss": 0.5687, + "step": 10868 + }, + { + "epoch": 0.77, + "grad_norm": 1.545792484611017, + "learning_rate": 1.310197037124471e-06, + "loss": 0.515, + "step": 10869 + }, + { + "epoch": 0.77, + "grad_norm": 1.5150031808353726, + "learning_rate": 1.3094216263745874e-06, + "loss": 0.4934, + "step": 10870 + }, + { + "epoch": 0.77, + "grad_norm": 1.6938887840230332, + "learning_rate": 1.3086464105735585e-06, + "loss": 0.5022, + "step": 10871 + }, + { + "epoch": 0.77, + "grad_norm": 1.6672224122830244, + "learning_rate": 1.3078713897623346e-06, + "loss": 0.533, + "step": 10872 + }, + { + "epoch": 0.77, + "grad_norm": 1.5924839949429324, + "learning_rate": 1.307096563981856e-06, + "loss": 0.5183, + "step": 10873 + }, + { + "epoch": 0.77, + "grad_norm": 1.6076138539633134, + "learning_rate": 1.3063219332730486e-06, + "loss": 0.5678, + "step": 10874 + }, + { + "epoch": 0.77, + "grad_norm": 1.6844381393107593, + "learning_rate": 1.3055474976768323e-06, + "loss": 0.5204, + "step": 10875 + }, + { + "epoch": 0.77, + "grad_norm": 1.8786530261159826, + "learning_rate": 1.304773257234116e-06, + "loss": 0.5142, + "step": 10876 + }, + { + "epoch": 0.77, + "grad_norm": 1.780473282005095, + "learning_rate": 1.3039992119857974e-06, + "loss": 0.5269, + "step": 10877 + }, + { + "epoch": 0.77, + "grad_norm": 1.628889769970003, + "learning_rate": 1.3032253619727653e-06, + "loss": 0.5852, + "step": 10878 + }, + { + "epoch": 0.77, + "grad_norm": 0.6635681414229859, + "learning_rate": 1.3024517072358932e-06, + "loss": 0.4132, + "step": 10879 + }, + { + "epoch": 0.77, + "grad_norm": 1.8671274388850376, + "learning_rate": 1.3016782478160546e-06, + "loss": 0.5643, + "step": 10880 + }, + { + "epoch": 0.77, + "grad_norm": 1.4677589370330062, + "learning_rate": 1.3009049837541e-06, + "loss": 0.4643, + "step": 10881 + }, + { + "epoch": 0.77, + "grad_norm": 1.6096491344110326, + "learning_rate": 1.3001319150908787e-06, + "loss": 0.4846, + "step": 10882 + }, + { + "epoch": 0.77, + "grad_norm": 1.5987129552508992, + "learning_rate": 1.299359041867227e-06, + "loss": 0.5089, + "step": 10883 + }, + { + "epoch": 0.77, + "grad_norm": 1.4806032435840462, + "learning_rate": 1.2985863641239699e-06, + "loss": 0.4941, + "step": 10884 + }, + { + "epoch": 0.77, + "grad_norm": 2.733318059857817, + "learning_rate": 1.2978138819019242e-06, + "loss": 0.5224, + "step": 10885 + }, + { + "epoch": 0.77, + "grad_norm": 1.589178650142607, + "learning_rate": 1.2970415952418913e-06, + "loss": 0.5277, + "step": 10886 + }, + { + "epoch": 0.77, + "grad_norm": 1.5974596521741362, + "learning_rate": 1.2962695041846717e-06, + "loss": 0.4663, + "step": 10887 + }, + { + "epoch": 0.77, + "grad_norm": 1.5829995520229314, + "learning_rate": 1.2954976087710458e-06, + "loss": 0.5461, + "step": 10888 + }, + { + "epoch": 0.77, + "grad_norm": 1.6826849810476183, + "learning_rate": 1.2947259090417879e-06, + "loss": 0.4897, + "step": 10889 + }, + { + "epoch": 0.77, + "grad_norm": 1.5241364430488633, + "learning_rate": 1.2939544050376634e-06, + "loss": 0.4947, + "step": 10890 + }, + { + "epoch": 0.77, + "grad_norm": 1.6331117431285345, + "learning_rate": 1.2931830967994247e-06, + "loss": 0.5327, + "step": 10891 + }, + { + "epoch": 0.77, + "grad_norm": 2.213586049037629, + "learning_rate": 1.2924119843678168e-06, + "loss": 0.503, + "step": 10892 + }, + { + "epoch": 0.77, + "grad_norm": 1.6095104738278714, + "learning_rate": 1.2916410677835695e-06, + "loss": 0.5569, + "step": 10893 + }, + { + "epoch": 0.77, + "grad_norm": 1.764558426687655, + "learning_rate": 1.2908703470874063e-06, + "loss": 0.5549, + "step": 10894 + }, + { + "epoch": 0.77, + "grad_norm": 1.7652998150375065, + "learning_rate": 1.29009982232004e-06, + "loss": 0.5218, + "step": 10895 + }, + { + "epoch": 0.77, + "grad_norm": 1.9896737676938796, + "learning_rate": 1.2893294935221734e-06, + "loss": 0.4888, + "step": 10896 + }, + { + "epoch": 0.77, + "grad_norm": 1.73947015601203, + "learning_rate": 1.2885593607344932e-06, + "loss": 0.4929, + "step": 10897 + }, + { + "epoch": 0.77, + "grad_norm": 2.1017269643989707, + "learning_rate": 1.2877894239976852e-06, + "loss": 0.5537, + "step": 10898 + }, + { + "epoch": 0.77, + "grad_norm": 1.9744171509484898, + "learning_rate": 1.2870196833524202e-06, + "loss": 0.5063, + "step": 10899 + }, + { + "epoch": 0.77, + "grad_norm": 1.6034252855920001, + "learning_rate": 1.286250138839355e-06, + "loss": 0.5488, + "step": 10900 + }, + { + "epoch": 0.77, + "grad_norm": 1.4709446157678105, + "learning_rate": 1.2854807904991413e-06, + "loss": 0.4567, + "step": 10901 + }, + { + "epoch": 0.77, + "grad_norm": 1.9340872316312068, + "learning_rate": 1.284711638372419e-06, + "loss": 0.5135, + "step": 10902 + }, + { + "epoch": 0.77, + "grad_norm": 1.4863720974327204, + "learning_rate": 1.2839426824998186e-06, + "loss": 0.5075, + "step": 10903 + }, + { + "epoch": 0.77, + "grad_norm": 1.661969280381403, + "learning_rate": 1.2831739229219559e-06, + "loss": 0.5403, + "step": 10904 + }, + { + "epoch": 0.77, + "grad_norm": 0.7470434211525666, + "learning_rate": 1.2824053596794412e-06, + "loss": 0.4078, + "step": 10905 + }, + { + "epoch": 0.77, + "grad_norm": 1.7095089273979542, + "learning_rate": 1.281636992812872e-06, + "loss": 0.551, + "step": 10906 + }, + { + "epoch": 0.77, + "grad_norm": 1.8277512043405852, + "learning_rate": 1.2808688223628362e-06, + "loss": 0.5072, + "step": 10907 + }, + { + "epoch": 0.77, + "grad_norm": 1.5660011362295392, + "learning_rate": 1.280100848369913e-06, + "loss": 0.4696, + "step": 10908 + }, + { + "epoch": 0.77, + "grad_norm": 1.7260660750113028, + "learning_rate": 1.279333070874665e-06, + "loss": 0.5206, + "step": 10909 + }, + { + "epoch": 0.77, + "grad_norm": 1.8580107217008046, + "learning_rate": 1.278565489917654e-06, + "loss": 0.5062, + "step": 10910 + }, + { + "epoch": 0.77, + "grad_norm": 2.090537444035423, + "learning_rate": 1.2777981055394228e-06, + "loss": 0.5537, + "step": 10911 + }, + { + "epoch": 0.77, + "grad_norm": 1.8390883058838106, + "learning_rate": 1.2770309177805079e-06, + "loss": 0.4766, + "step": 10912 + }, + { + "epoch": 0.77, + "grad_norm": 1.5258573207786446, + "learning_rate": 1.2762639266814352e-06, + "loss": 0.4744, + "step": 10913 + }, + { + "epoch": 0.77, + "grad_norm": 2.118609343832253, + "learning_rate": 1.2754971322827198e-06, + "loss": 0.5708, + "step": 10914 + }, + { + "epoch": 0.77, + "grad_norm": 1.637177550612563, + "learning_rate": 1.2747305346248673e-06, + "loss": 0.5297, + "step": 10915 + }, + { + "epoch": 0.77, + "grad_norm": 1.565659377173418, + "learning_rate": 1.2739641337483682e-06, + "loss": 0.4914, + "step": 10916 + }, + { + "epoch": 0.77, + "grad_norm": 0.7300313374075204, + "learning_rate": 1.2731979296937119e-06, + "loss": 0.4321, + "step": 10917 + }, + { + "epoch": 0.77, + "grad_norm": 1.7013292738967674, + "learning_rate": 1.2724319225013675e-06, + "loss": 0.5365, + "step": 10918 + }, + { + "epoch": 0.77, + "grad_norm": 1.6586119382020115, + "learning_rate": 1.2716661122118e-06, + "loss": 0.53, + "step": 10919 + }, + { + "epoch": 0.77, + "grad_norm": 0.720656146042525, + "learning_rate": 1.2709004988654617e-06, + "loss": 0.4217, + "step": 10920 + }, + { + "epoch": 0.77, + "grad_norm": 1.9288341502543607, + "learning_rate": 1.2701350825027946e-06, + "loss": 0.4858, + "step": 10921 + }, + { + "epoch": 0.78, + "grad_norm": 1.9361374663790252, + "learning_rate": 1.2693698631642333e-06, + "loss": 0.634, + "step": 10922 + }, + { + "epoch": 0.78, + "grad_norm": 2.3860815941462477, + "learning_rate": 1.2686048408901946e-06, + "loss": 0.5376, + "step": 10923 + }, + { + "epoch": 0.78, + "grad_norm": 1.6787751125163501, + "learning_rate": 1.2678400157210924e-06, + "loss": 0.517, + "step": 10924 + }, + { + "epoch": 0.78, + "grad_norm": 1.7674574770848104, + "learning_rate": 1.2670753876973263e-06, + "loss": 0.5089, + "step": 10925 + }, + { + "epoch": 0.78, + "grad_norm": 2.2337662604144652, + "learning_rate": 1.2663109568592874e-06, + "loss": 0.4933, + "step": 10926 + }, + { + "epoch": 0.78, + "grad_norm": 1.6985228572491242, + "learning_rate": 1.2655467232473567e-06, + "loss": 0.4558, + "step": 10927 + }, + { + "epoch": 0.78, + "grad_norm": 1.5703144330172456, + "learning_rate": 1.2647826869018991e-06, + "loss": 0.4567, + "step": 10928 + }, + { + "epoch": 0.78, + "grad_norm": 1.8103053629105468, + "learning_rate": 1.2640188478632798e-06, + "loss": 0.5373, + "step": 10929 + }, + { + "epoch": 0.78, + "grad_norm": 1.5867033004971172, + "learning_rate": 1.2632552061718428e-06, + "loss": 0.5389, + "step": 10930 + }, + { + "epoch": 0.78, + "grad_norm": 1.670952411694668, + "learning_rate": 1.2624917618679272e-06, + "loss": 0.5978, + "step": 10931 + }, + { + "epoch": 0.78, + "grad_norm": 1.7404344452819018, + "learning_rate": 1.2617285149918612e-06, + "loss": 0.5095, + "step": 10932 + }, + { + "epoch": 0.78, + "grad_norm": 2.0390939570436766, + "learning_rate": 1.2609654655839636e-06, + "loss": 0.5067, + "step": 10933 + }, + { + "epoch": 0.78, + "grad_norm": 1.7684965824315106, + "learning_rate": 1.2602026136845386e-06, + "loss": 0.5585, + "step": 10934 + }, + { + "epoch": 0.78, + "grad_norm": 1.7626715308845304, + "learning_rate": 1.259439959333882e-06, + "loss": 0.4445, + "step": 10935 + }, + { + "epoch": 0.78, + "grad_norm": 1.5751333808817984, + "learning_rate": 1.258677502572284e-06, + "loss": 0.4912, + "step": 10936 + }, + { + "epoch": 0.78, + "grad_norm": 0.7485465937847902, + "learning_rate": 1.2579152434400167e-06, + "loss": 0.4224, + "step": 10937 + }, + { + "epoch": 0.78, + "grad_norm": 1.5862343480983825, + "learning_rate": 1.2571531819773474e-06, + "loss": 0.4975, + "step": 10938 + }, + { + "epoch": 0.78, + "grad_norm": 1.8662947066915405, + "learning_rate": 1.2563913182245268e-06, + "loss": 0.4962, + "step": 10939 + }, + { + "epoch": 0.78, + "grad_norm": 1.6056648932982254, + "learning_rate": 1.2556296522218048e-06, + "loss": 0.5353, + "step": 10940 + }, + { + "epoch": 0.78, + "grad_norm": 1.957946890623682, + "learning_rate": 1.2548681840094102e-06, + "loss": 0.5486, + "step": 10941 + }, + { + "epoch": 0.78, + "grad_norm": 1.778359055854306, + "learning_rate": 1.254106913627569e-06, + "loss": 0.4731, + "step": 10942 + }, + { + "epoch": 0.78, + "grad_norm": 1.6516141999403011, + "learning_rate": 1.2533458411164927e-06, + "loss": 0.484, + "step": 10943 + }, + { + "epoch": 0.78, + "grad_norm": 2.385155025537929, + "learning_rate": 1.2525849665163853e-06, + "loss": 0.5328, + "step": 10944 + }, + { + "epoch": 0.78, + "grad_norm": 1.5913787534575845, + "learning_rate": 1.2518242898674388e-06, + "loss": 0.5638, + "step": 10945 + }, + { + "epoch": 0.78, + "grad_norm": 1.6377100515917407, + "learning_rate": 1.2510638112098318e-06, + "loss": 0.4728, + "step": 10946 + }, + { + "epoch": 0.78, + "grad_norm": 1.8594719833180613, + "learning_rate": 1.2503035305837391e-06, + "loss": 0.5721, + "step": 10947 + }, + { + "epoch": 0.78, + "grad_norm": 1.817810557259487, + "learning_rate": 1.249543448029319e-06, + "loss": 0.5381, + "step": 10948 + }, + { + "epoch": 0.78, + "grad_norm": 1.7363955972245342, + "learning_rate": 1.2487835635867224e-06, + "loss": 0.4544, + "step": 10949 + }, + { + "epoch": 0.78, + "grad_norm": 1.4928895333967576, + "learning_rate": 1.2480238772960885e-06, + "loss": 0.4756, + "step": 10950 + }, + { + "epoch": 0.78, + "grad_norm": 1.8761309484910431, + "learning_rate": 1.2472643891975467e-06, + "loss": 0.5422, + "step": 10951 + }, + { + "epoch": 0.78, + "grad_norm": 1.6560713003289882, + "learning_rate": 1.2465050993312172e-06, + "loss": 0.5015, + "step": 10952 + }, + { + "epoch": 0.78, + "grad_norm": 2.264307497425222, + "learning_rate": 1.2457460077372057e-06, + "loss": 0.561, + "step": 10953 + }, + { + "epoch": 0.78, + "grad_norm": 1.639747596662213, + "learning_rate": 1.2449871144556114e-06, + "loss": 0.4399, + "step": 10954 + }, + { + "epoch": 0.78, + "grad_norm": 1.964402197714277, + "learning_rate": 1.2442284195265213e-06, + "loss": 0.5528, + "step": 10955 + }, + { + "epoch": 0.78, + "grad_norm": 1.9106746159107661, + "learning_rate": 1.243469922990012e-06, + "loss": 0.5506, + "step": 10956 + }, + { + "epoch": 0.78, + "grad_norm": 1.860330424706358, + "learning_rate": 1.2427116248861521e-06, + "loss": 0.5317, + "step": 10957 + }, + { + "epoch": 0.78, + "grad_norm": 1.6511085444033526, + "learning_rate": 1.2419535252549925e-06, + "loss": 0.5474, + "step": 10958 + }, + { + "epoch": 0.78, + "grad_norm": 1.6171166731318076, + "learning_rate": 1.2411956241365846e-06, + "loss": 0.4332, + "step": 10959 + }, + { + "epoch": 0.78, + "grad_norm": 1.9527818379880324, + "learning_rate": 1.2404379215709595e-06, + "loss": 0.5211, + "step": 10960 + }, + { + "epoch": 0.78, + "grad_norm": 1.7909853473487907, + "learning_rate": 1.239680417598142e-06, + "loss": 0.5122, + "step": 10961 + }, + { + "epoch": 0.78, + "grad_norm": 1.6705765684652034, + "learning_rate": 1.238923112258147e-06, + "loss": 0.5577, + "step": 10962 + }, + { + "epoch": 0.78, + "grad_norm": 1.59827989968891, + "learning_rate": 1.2381660055909767e-06, + "loss": 0.5347, + "step": 10963 + }, + { + "epoch": 0.78, + "grad_norm": 1.6932388352388064, + "learning_rate": 1.2374090976366264e-06, + "loss": 0.4934, + "step": 10964 + }, + { + "epoch": 0.78, + "grad_norm": 1.5892702090688726, + "learning_rate": 1.2366523884350745e-06, + "loss": 0.5346, + "step": 10965 + }, + { + "epoch": 0.78, + "grad_norm": 1.6500639183074481, + "learning_rate": 1.235895878026298e-06, + "loss": 0.5109, + "step": 10966 + }, + { + "epoch": 0.78, + "grad_norm": 1.7288294292301885, + "learning_rate": 1.2351395664502548e-06, + "loss": 0.4571, + "step": 10967 + }, + { + "epoch": 0.78, + "grad_norm": 0.6763036019456693, + "learning_rate": 1.234383453746898e-06, + "loss": 0.416, + "step": 10968 + }, + { + "epoch": 0.78, + "grad_norm": 1.6328431926850808, + "learning_rate": 1.2336275399561636e-06, + "loss": 0.5477, + "step": 10969 + }, + { + "epoch": 0.78, + "grad_norm": 1.9745137245006725, + "learning_rate": 1.2328718251179877e-06, + "loss": 0.5475, + "step": 10970 + }, + { + "epoch": 0.78, + "grad_norm": 2.472301979203551, + "learning_rate": 1.2321163092722855e-06, + "loss": 0.5307, + "step": 10971 + }, + { + "epoch": 0.78, + "grad_norm": 1.644615842700015, + "learning_rate": 1.2313609924589675e-06, + "loss": 0.4648, + "step": 10972 + }, + { + "epoch": 0.78, + "grad_norm": 1.807021630265791, + "learning_rate": 1.2306058747179312e-06, + "loss": 0.4956, + "step": 10973 + }, + { + "epoch": 0.78, + "grad_norm": 1.7424712562772797, + "learning_rate": 1.2298509560890653e-06, + "loss": 0.468, + "step": 10974 + }, + { + "epoch": 0.78, + "grad_norm": 1.8247576444375855, + "learning_rate": 1.2290962366122482e-06, + "loss": 0.5743, + "step": 10975 + }, + { + "epoch": 0.78, + "grad_norm": 1.6459636194229441, + "learning_rate": 1.2283417163273425e-06, + "loss": 0.5103, + "step": 10976 + }, + { + "epoch": 0.78, + "grad_norm": 1.6763732738088475, + "learning_rate": 1.2275873952742106e-06, + "loss": 0.5284, + "step": 10977 + }, + { + "epoch": 0.78, + "grad_norm": 1.9541266323491837, + "learning_rate": 1.226833273492694e-06, + "loss": 0.5853, + "step": 10978 + }, + { + "epoch": 0.78, + "grad_norm": 2.071068654143779, + "learning_rate": 1.2260793510226289e-06, + "loss": 0.6004, + "step": 10979 + }, + { + "epoch": 0.78, + "grad_norm": 1.9772278175674907, + "learning_rate": 1.22532562790384e-06, + "loss": 0.4984, + "step": 10980 + }, + { + "epoch": 0.78, + "grad_norm": 1.604690224230873, + "learning_rate": 1.2245721041761417e-06, + "loss": 0.5191, + "step": 10981 + }, + { + "epoch": 0.78, + "grad_norm": 1.9135204332283975, + "learning_rate": 1.2238187798793393e-06, + "loss": 0.4895, + "step": 10982 + }, + { + "epoch": 0.78, + "grad_norm": 1.5643918593884518, + "learning_rate": 1.2230656550532233e-06, + "loss": 0.5268, + "step": 10983 + }, + { + "epoch": 0.78, + "grad_norm": 2.3586472881216167, + "learning_rate": 1.222312729737577e-06, + "loss": 0.5818, + "step": 10984 + }, + { + "epoch": 0.78, + "grad_norm": 2.018657263132681, + "learning_rate": 1.2215600039721731e-06, + "loss": 0.5103, + "step": 10985 + }, + { + "epoch": 0.78, + "grad_norm": 2.3797141593182243, + "learning_rate": 1.2208074777967731e-06, + "loss": 0.5932, + "step": 10986 + }, + { + "epoch": 0.78, + "grad_norm": 4.498677460649487, + "learning_rate": 1.2200551512511288e-06, + "loss": 0.4832, + "step": 10987 + }, + { + "epoch": 0.78, + "grad_norm": 1.708014649105612, + "learning_rate": 1.2193030243749771e-06, + "loss": 0.4929, + "step": 10988 + }, + { + "epoch": 0.78, + "grad_norm": 2.2853829723086583, + "learning_rate": 1.2185510972080527e-06, + "loss": 0.5301, + "step": 10989 + }, + { + "epoch": 0.78, + "grad_norm": 1.6260146990716302, + "learning_rate": 1.217799369790072e-06, + "loss": 0.5284, + "step": 10990 + }, + { + "epoch": 0.78, + "grad_norm": 0.7216031665940201, + "learning_rate": 1.217047842160744e-06, + "loss": 0.4344, + "step": 10991 + }, + { + "epoch": 0.78, + "grad_norm": 1.8084276467837974, + "learning_rate": 1.216296514359767e-06, + "loss": 0.5077, + "step": 10992 + }, + { + "epoch": 0.78, + "grad_norm": 1.7280216784861346, + "learning_rate": 1.2155453864268296e-06, + "loss": 0.5521, + "step": 10993 + }, + { + "epoch": 0.78, + "grad_norm": 1.7761296895490348, + "learning_rate": 1.2147944584016097e-06, + "loss": 0.5619, + "step": 10994 + }, + { + "epoch": 0.78, + "grad_norm": 1.6934806052539673, + "learning_rate": 1.2140437303237696e-06, + "loss": 0.5786, + "step": 10995 + }, + { + "epoch": 0.78, + "grad_norm": 0.7103871468257379, + "learning_rate": 1.2132932022329707e-06, + "loss": 0.4186, + "step": 10996 + }, + { + "epoch": 0.78, + "grad_norm": 1.8452055767860907, + "learning_rate": 1.2125428741688549e-06, + "loss": 0.5208, + "step": 10997 + }, + { + "epoch": 0.78, + "grad_norm": 1.921722611938055, + "learning_rate": 1.21179274617106e-06, + "loss": 0.4652, + "step": 10998 + }, + { + "epoch": 0.78, + "grad_norm": 1.9065178563011886, + "learning_rate": 1.211042818279205e-06, + "loss": 0.5283, + "step": 10999 + }, + { + "epoch": 0.78, + "grad_norm": 1.8959396608879782, + "learning_rate": 1.2102930905329101e-06, + "loss": 0.5924, + "step": 11000 + }, + { + "epoch": 0.78, + "grad_norm": 1.701487420256794, + "learning_rate": 1.2095435629717739e-06, + "loss": 0.5692, + "step": 11001 + }, + { + "epoch": 0.78, + "grad_norm": 1.5104060329745876, + "learning_rate": 1.208794235635391e-06, + "loss": 0.4782, + "step": 11002 + }, + { + "epoch": 0.78, + "grad_norm": 1.4737803083244958, + "learning_rate": 1.2080451085633426e-06, + "loss": 0.4538, + "step": 11003 + }, + { + "epoch": 0.78, + "grad_norm": 1.7539798377374956, + "learning_rate": 1.2072961817952005e-06, + "loss": 0.4488, + "step": 11004 + }, + { + "epoch": 0.78, + "grad_norm": 1.9284126154254553, + "learning_rate": 1.206547455370527e-06, + "loss": 0.5156, + "step": 11005 + }, + { + "epoch": 0.78, + "grad_norm": 1.8075635304328046, + "learning_rate": 1.205798929328869e-06, + "loss": 0.5144, + "step": 11006 + }, + { + "epoch": 0.78, + "grad_norm": 1.824095700367255, + "learning_rate": 1.2050506037097687e-06, + "loss": 0.5491, + "step": 11007 + }, + { + "epoch": 0.78, + "grad_norm": 1.953721775757005, + "learning_rate": 1.2043024785527542e-06, + "loss": 0.5383, + "step": 11008 + }, + { + "epoch": 0.78, + "grad_norm": 0.6471240243110402, + "learning_rate": 1.2035545538973442e-06, + "loss": 0.4208, + "step": 11009 + }, + { + "epoch": 0.78, + "grad_norm": 1.4729591148435044, + "learning_rate": 1.202806829783047e-06, + "loss": 0.447, + "step": 11010 + }, + { + "epoch": 0.78, + "grad_norm": 1.8555601159808282, + "learning_rate": 1.2020593062493596e-06, + "loss": 0.5291, + "step": 11011 + }, + { + "epoch": 0.78, + "grad_norm": 2.1363934261708106, + "learning_rate": 1.2013119833357706e-06, + "loss": 0.4998, + "step": 11012 + }, + { + "epoch": 0.78, + "grad_norm": 0.7025609295265299, + "learning_rate": 1.2005648610817528e-06, + "loss": 0.4191, + "step": 11013 + }, + { + "epoch": 0.78, + "grad_norm": 1.527027904313943, + "learning_rate": 1.1998179395267729e-06, + "loss": 0.481, + "step": 11014 + }, + { + "epoch": 0.78, + "grad_norm": 1.6594806284918397, + "learning_rate": 1.1990712187102865e-06, + "loss": 0.48, + "step": 11015 + }, + { + "epoch": 0.78, + "grad_norm": 1.793571414082666, + "learning_rate": 1.1983246986717378e-06, + "loss": 0.4762, + "step": 11016 + }, + { + "epoch": 0.78, + "grad_norm": 2.3243059541804163, + "learning_rate": 1.1975783794505614e-06, + "loss": 0.4944, + "step": 11017 + }, + { + "epoch": 0.78, + "grad_norm": 1.4591267966253827, + "learning_rate": 1.1968322610861767e-06, + "loss": 0.5156, + "step": 11018 + }, + { + "epoch": 0.78, + "grad_norm": 1.793487527401563, + "learning_rate": 1.1960863436180016e-06, + "loss": 0.5755, + "step": 11019 + }, + { + "epoch": 0.78, + "grad_norm": 0.7824704432162839, + "learning_rate": 1.195340627085434e-06, + "loss": 0.4163, + "step": 11020 + }, + { + "epoch": 0.78, + "grad_norm": 2.282166863818371, + "learning_rate": 1.1945951115278664e-06, + "loss": 0.5762, + "step": 11021 + }, + { + "epoch": 0.78, + "grad_norm": 1.690058224417182, + "learning_rate": 1.1938497969846795e-06, + "loss": 0.5664, + "step": 11022 + }, + { + "epoch": 0.78, + "grad_norm": 1.6081645291220275, + "learning_rate": 1.193104683495243e-06, + "loss": 0.5357, + "step": 11023 + }, + { + "epoch": 0.78, + "grad_norm": 1.7060383066416038, + "learning_rate": 1.1923597710989183e-06, + "loss": 0.547, + "step": 11024 + }, + { + "epoch": 0.78, + "grad_norm": 2.5144036571345776, + "learning_rate": 1.1916150598350496e-06, + "loss": 0.5269, + "step": 11025 + }, + { + "epoch": 0.78, + "grad_norm": 1.6275865287745257, + "learning_rate": 1.1908705497429812e-06, + "loss": 0.4949, + "step": 11026 + }, + { + "epoch": 0.78, + "grad_norm": 1.5939634210379698, + "learning_rate": 1.1901262408620357e-06, + "loss": 0.4834, + "step": 11027 + }, + { + "epoch": 0.78, + "grad_norm": 1.6486298453755994, + "learning_rate": 1.1893821332315336e-06, + "loss": 0.5233, + "step": 11028 + }, + { + "epoch": 0.78, + "grad_norm": 1.6391315217765428, + "learning_rate": 1.188638226890776e-06, + "loss": 0.5493, + "step": 11029 + }, + { + "epoch": 0.78, + "grad_norm": 0.6883404143738218, + "learning_rate": 1.1878945218790633e-06, + "loss": 0.4311, + "step": 11030 + }, + { + "epoch": 0.78, + "grad_norm": 1.6802829877219045, + "learning_rate": 1.187151018235681e-06, + "loss": 0.5959, + "step": 11031 + }, + { + "epoch": 0.78, + "grad_norm": 1.899265892332404, + "learning_rate": 1.1864077159998999e-06, + "loss": 0.5883, + "step": 11032 + }, + { + "epoch": 0.78, + "grad_norm": 1.5415573676291532, + "learning_rate": 1.1856646152109857e-06, + "loss": 0.5274, + "step": 11033 + }, + { + "epoch": 0.78, + "grad_norm": 1.6883672803281489, + "learning_rate": 1.184921715908191e-06, + "loss": 0.484, + "step": 11034 + }, + { + "epoch": 0.78, + "grad_norm": 1.8394521423042018, + "learning_rate": 1.18417901813076e-06, + "loss": 0.5057, + "step": 11035 + }, + { + "epoch": 0.78, + "grad_norm": 2.026076084241232, + "learning_rate": 1.1834365219179218e-06, + "loss": 0.5591, + "step": 11036 + }, + { + "epoch": 0.78, + "grad_norm": 1.5282277016145205, + "learning_rate": 1.1826942273088981e-06, + "loss": 0.4942, + "step": 11037 + }, + { + "epoch": 0.78, + "grad_norm": 1.8278155242431713, + "learning_rate": 1.1819521343429008e-06, + "loss": 0.4886, + "step": 11038 + }, + { + "epoch": 0.78, + "grad_norm": 1.5119151294561946, + "learning_rate": 1.1812102430591288e-06, + "loss": 0.5456, + "step": 11039 + }, + { + "epoch": 0.78, + "grad_norm": 1.6578488826590867, + "learning_rate": 1.1804685534967735e-06, + "loss": 0.5059, + "step": 11040 + }, + { + "epoch": 0.78, + "grad_norm": 0.6851913239115897, + "learning_rate": 1.1797270656950083e-06, + "loss": 0.3928, + "step": 11041 + }, + { + "epoch": 0.78, + "grad_norm": 1.7771646111658195, + "learning_rate": 1.178985779693007e-06, + "loss": 0.4892, + "step": 11042 + }, + { + "epoch": 0.78, + "grad_norm": 2.277006845806584, + "learning_rate": 1.1782446955299231e-06, + "loss": 0.5242, + "step": 11043 + }, + { + "epoch": 0.78, + "grad_norm": 2.228147404969636, + "learning_rate": 1.177503813244904e-06, + "loss": 0.5196, + "step": 11044 + }, + { + "epoch": 0.78, + "grad_norm": 4.294776651948943, + "learning_rate": 1.1767631328770863e-06, + "loss": 0.5653, + "step": 11045 + }, + { + "epoch": 0.78, + "grad_norm": 0.7817257516735312, + "learning_rate": 1.176022654465594e-06, + "loss": 0.4043, + "step": 11046 + }, + { + "epoch": 0.78, + "grad_norm": 1.7191469025325101, + "learning_rate": 1.1752823780495449e-06, + "loss": 0.4797, + "step": 11047 + }, + { + "epoch": 0.78, + "grad_norm": 1.482447481265252, + "learning_rate": 1.174542303668037e-06, + "loss": 0.4362, + "step": 11048 + }, + { + "epoch": 0.78, + "grad_norm": 1.629180682672729, + "learning_rate": 1.1738024313601698e-06, + "loss": 0.4799, + "step": 11049 + }, + { + "epoch": 0.78, + "grad_norm": 1.6457025525595588, + "learning_rate": 1.1730627611650219e-06, + "loss": 0.5115, + "step": 11050 + }, + { + "epoch": 0.78, + "grad_norm": 7.156821153458225, + "learning_rate": 1.1723232931216666e-06, + "loss": 0.5335, + "step": 11051 + }, + { + "epoch": 0.78, + "grad_norm": 1.647359602191678, + "learning_rate": 1.1715840272691647e-06, + "loss": 0.5026, + "step": 11052 + }, + { + "epoch": 0.78, + "grad_norm": 1.6032113622712225, + "learning_rate": 1.170844963646567e-06, + "loss": 0.4857, + "step": 11053 + }, + { + "epoch": 0.78, + "grad_norm": 1.854832090505108, + "learning_rate": 1.1701061022929145e-06, + "loss": 0.5212, + "step": 11054 + }, + { + "epoch": 0.78, + "grad_norm": 1.8580915641460534, + "learning_rate": 1.169367443247234e-06, + "loss": 0.4424, + "step": 11055 + }, + { + "epoch": 0.78, + "grad_norm": 0.7374975733355197, + "learning_rate": 1.1686289865485451e-06, + "loss": 0.4272, + "step": 11056 + }, + { + "epoch": 0.78, + "grad_norm": 1.849392665004187, + "learning_rate": 1.1678907322358552e-06, + "loss": 0.543, + "step": 11057 + }, + { + "epoch": 0.78, + "grad_norm": 1.6697567788169236, + "learning_rate": 1.167152680348162e-06, + "loss": 0.5214, + "step": 11058 + }, + { + "epoch": 0.78, + "grad_norm": 1.5385731450986397, + "learning_rate": 1.1664148309244515e-06, + "loss": 0.4641, + "step": 11059 + }, + { + "epoch": 0.78, + "grad_norm": 1.8216632432992292, + "learning_rate": 1.1656771840037002e-06, + "loss": 0.5538, + "step": 11060 + }, + { + "epoch": 0.78, + "grad_norm": 1.6809404848911063, + "learning_rate": 1.1649397396248735e-06, + "loss": 0.5709, + "step": 11061 + }, + { + "epoch": 0.78, + "grad_norm": 1.864258149103781, + "learning_rate": 1.1642024978269234e-06, + "loss": 0.5259, + "step": 11062 + }, + { + "epoch": 0.79, + "grad_norm": 2.424047123756508, + "learning_rate": 1.1634654586487949e-06, + "loss": 0.5322, + "step": 11063 + }, + { + "epoch": 0.79, + "grad_norm": 1.5703400590444618, + "learning_rate": 1.1627286221294205e-06, + "loss": 0.5264, + "step": 11064 + }, + { + "epoch": 0.79, + "grad_norm": 1.7229052354129164, + "learning_rate": 1.1619919883077241e-06, + "loss": 0.5342, + "step": 11065 + }, + { + "epoch": 0.79, + "grad_norm": 0.6771223248527087, + "learning_rate": 1.1612555572226147e-06, + "loss": 0.4179, + "step": 11066 + }, + { + "epoch": 0.79, + "grad_norm": 0.7724598926354093, + "learning_rate": 1.1605193289129928e-06, + "loss": 0.4029, + "step": 11067 + }, + { + "epoch": 0.79, + "grad_norm": 1.818533303875245, + "learning_rate": 1.1597833034177524e-06, + "loss": 0.5794, + "step": 11068 + }, + { + "epoch": 0.79, + "grad_norm": 1.7394223814700076, + "learning_rate": 1.159047480775769e-06, + "loss": 0.512, + "step": 11069 + }, + { + "epoch": 0.79, + "grad_norm": 0.6692522679870658, + "learning_rate": 1.1583118610259143e-06, + "loss": 0.4036, + "step": 11070 + }, + { + "epoch": 0.79, + "grad_norm": 1.7233215265793131, + "learning_rate": 1.1575764442070414e-06, + "loss": 0.4969, + "step": 11071 + }, + { + "epoch": 0.79, + "grad_norm": 1.8570213267342284, + "learning_rate": 1.156841230358004e-06, + "loss": 0.5437, + "step": 11072 + }, + { + "epoch": 0.79, + "grad_norm": 1.6494940728613363, + "learning_rate": 1.156106219517633e-06, + "loss": 0.487, + "step": 11073 + }, + { + "epoch": 0.79, + "grad_norm": 0.7159297859726088, + "learning_rate": 1.1553714117247571e-06, + "loss": 0.432, + "step": 11074 + }, + { + "epoch": 0.79, + "grad_norm": 1.5863115733199185, + "learning_rate": 1.1546368070181902e-06, + "loss": 0.5138, + "step": 11075 + }, + { + "epoch": 0.79, + "grad_norm": 2.9631153218443793, + "learning_rate": 1.153902405436737e-06, + "loss": 0.5429, + "step": 11076 + }, + { + "epoch": 0.79, + "grad_norm": 0.6628920954687847, + "learning_rate": 1.1531682070191925e-06, + "loss": 0.4462, + "step": 11077 + }, + { + "epoch": 0.79, + "grad_norm": 1.7115840402996398, + "learning_rate": 1.1524342118043358e-06, + "loss": 0.4864, + "step": 11078 + }, + { + "epoch": 0.79, + "grad_norm": 2.814729795146018, + "learning_rate": 1.1517004198309434e-06, + "loss": 0.5358, + "step": 11079 + }, + { + "epoch": 0.79, + "grad_norm": 1.8379887977149012, + "learning_rate": 1.1509668311377736e-06, + "loss": 0.4652, + "step": 11080 + }, + { + "epoch": 0.79, + "grad_norm": 1.8053041270071903, + "learning_rate": 1.1502334457635783e-06, + "loss": 0.5184, + "step": 11081 + }, + { + "epoch": 0.79, + "grad_norm": 1.6243944752636452, + "learning_rate": 1.149500263747097e-06, + "loss": 0.5039, + "step": 11082 + }, + { + "epoch": 0.79, + "grad_norm": 1.8840037575938509, + "learning_rate": 1.1487672851270593e-06, + "loss": 0.4768, + "step": 11083 + }, + { + "epoch": 0.79, + "grad_norm": 1.612842232359914, + "learning_rate": 1.1480345099421846e-06, + "loss": 0.5254, + "step": 11084 + }, + { + "epoch": 0.79, + "grad_norm": 1.8533041491280224, + "learning_rate": 1.1473019382311778e-06, + "loss": 0.4788, + "step": 11085 + }, + { + "epoch": 0.79, + "grad_norm": 1.8123912644057865, + "learning_rate": 1.1465695700327373e-06, + "loss": 0.497, + "step": 11086 + }, + { + "epoch": 0.79, + "grad_norm": 1.7963399967020914, + "learning_rate": 1.1458374053855492e-06, + "loss": 0.5186, + "step": 11087 + }, + { + "epoch": 0.79, + "grad_norm": 1.7671215129009903, + "learning_rate": 1.1451054443282893e-06, + "loss": 0.5183, + "step": 11088 + }, + { + "epoch": 0.79, + "grad_norm": 2.933085174237955, + "learning_rate": 1.1443736868996219e-06, + "loss": 0.5014, + "step": 11089 + }, + { + "epoch": 0.79, + "grad_norm": 1.614274346166808, + "learning_rate": 1.1436421331382015e-06, + "loss": 0.5139, + "step": 11090 + }, + { + "epoch": 0.79, + "grad_norm": 2.0392198378519173, + "learning_rate": 1.1429107830826714e-06, + "loss": 0.5965, + "step": 11091 + }, + { + "epoch": 0.79, + "grad_norm": 1.6412227802051131, + "learning_rate": 1.142179636771662e-06, + "loss": 0.5207, + "step": 11092 + }, + { + "epoch": 0.79, + "grad_norm": 0.666422957985759, + "learning_rate": 1.1414486942437969e-06, + "loss": 0.3986, + "step": 11093 + }, + { + "epoch": 0.79, + "grad_norm": 1.690088482907427, + "learning_rate": 1.1407179555376857e-06, + "loss": 0.5033, + "step": 11094 + }, + { + "epoch": 0.79, + "grad_norm": 1.8692701515284387, + "learning_rate": 1.1399874206919293e-06, + "loss": 0.5058, + "step": 11095 + }, + { + "epoch": 0.79, + "grad_norm": 1.6785784592570467, + "learning_rate": 1.1392570897451183e-06, + "loss": 0.5233, + "step": 11096 + }, + { + "epoch": 0.79, + "grad_norm": 1.694438462699581, + "learning_rate": 1.138526962735827e-06, + "loss": 0.5351, + "step": 11097 + }, + { + "epoch": 0.79, + "grad_norm": 1.7479075398014026, + "learning_rate": 1.137797039702629e-06, + "loss": 0.4985, + "step": 11098 + }, + { + "epoch": 0.79, + "grad_norm": 1.6139429629316708, + "learning_rate": 1.1370673206840766e-06, + "loss": 0.5422, + "step": 11099 + }, + { + "epoch": 0.79, + "grad_norm": 1.9534962277113515, + "learning_rate": 1.1363378057187192e-06, + "loss": 0.5443, + "step": 11100 + }, + { + "epoch": 0.79, + "grad_norm": 1.4813818950120183, + "learning_rate": 1.135608494845088e-06, + "loss": 0.5215, + "step": 11101 + }, + { + "epoch": 0.79, + "grad_norm": 0.7134652909176317, + "learning_rate": 1.1348793881017133e-06, + "loss": 0.4475, + "step": 11102 + }, + { + "epoch": 0.79, + "grad_norm": 1.5668747641577059, + "learning_rate": 1.1341504855271045e-06, + "loss": 0.4797, + "step": 11103 + }, + { + "epoch": 0.79, + "grad_norm": 1.6709158855891055, + "learning_rate": 1.1334217871597663e-06, + "loss": 0.5399, + "step": 11104 + }, + { + "epoch": 0.79, + "grad_norm": 1.6339004610879886, + "learning_rate": 1.1326932930381918e-06, + "loss": 0.4981, + "step": 11105 + }, + { + "epoch": 0.79, + "grad_norm": 1.7922285646965694, + "learning_rate": 1.131965003200861e-06, + "loss": 0.5597, + "step": 11106 + }, + { + "epoch": 0.79, + "grad_norm": 0.7017141293375915, + "learning_rate": 1.131236917686247e-06, + "loss": 0.3977, + "step": 11107 + }, + { + "epoch": 0.79, + "grad_norm": 1.72860437953875, + "learning_rate": 1.1305090365328053e-06, + "loss": 0.5778, + "step": 11108 + }, + { + "epoch": 0.79, + "grad_norm": 1.6632253233964331, + "learning_rate": 1.1297813597789908e-06, + "loss": 0.4844, + "step": 11109 + }, + { + "epoch": 0.79, + "grad_norm": 1.7154549907553032, + "learning_rate": 1.129053887463238e-06, + "loss": 0.4994, + "step": 11110 + }, + { + "epoch": 0.79, + "grad_norm": 4.46975914053688, + "learning_rate": 1.128326619623975e-06, + "loss": 0.4811, + "step": 11111 + }, + { + "epoch": 0.79, + "grad_norm": 4.7999360228035135, + "learning_rate": 1.127599556299619e-06, + "loss": 0.4672, + "step": 11112 + }, + { + "epoch": 0.79, + "grad_norm": 2.555470185326032, + "learning_rate": 1.126872697528576e-06, + "loss": 0.5114, + "step": 11113 + }, + { + "epoch": 0.79, + "grad_norm": 1.858505766639672, + "learning_rate": 1.1261460433492422e-06, + "loss": 0.5446, + "step": 11114 + }, + { + "epoch": 0.79, + "grad_norm": 0.7864236081883107, + "learning_rate": 1.1254195937999996e-06, + "loss": 0.4373, + "step": 11115 + }, + { + "epoch": 0.79, + "grad_norm": 1.54696010889377, + "learning_rate": 1.124693348919223e-06, + "loss": 0.5077, + "step": 11116 + }, + { + "epoch": 0.79, + "grad_norm": 0.7089584157336273, + "learning_rate": 1.1239673087452752e-06, + "loss": 0.4444, + "step": 11117 + }, + { + "epoch": 0.79, + "grad_norm": 1.5338215593657034, + "learning_rate": 1.1232414733165075e-06, + "loss": 0.4368, + "step": 11118 + }, + { + "epoch": 0.79, + "grad_norm": 1.858944271086086, + "learning_rate": 1.122515842671263e-06, + "loss": 0.5414, + "step": 11119 + }, + { + "epoch": 0.79, + "grad_norm": 1.637832291844311, + "learning_rate": 1.1217904168478677e-06, + "loss": 0.4885, + "step": 11120 + }, + { + "epoch": 0.79, + "grad_norm": 1.9301701873818475, + "learning_rate": 1.1210651958846463e-06, + "loss": 0.5327, + "step": 11121 + }, + { + "epoch": 0.79, + "grad_norm": 0.8157902687369112, + "learning_rate": 1.1203401798199038e-06, + "loss": 0.4227, + "step": 11122 + }, + { + "epoch": 0.79, + "grad_norm": 1.8289121508508992, + "learning_rate": 1.1196153686919386e-06, + "loss": 0.5462, + "step": 11123 + }, + { + "epoch": 0.79, + "grad_norm": 1.5691796235958002, + "learning_rate": 1.1188907625390388e-06, + "loss": 0.4511, + "step": 11124 + }, + { + "epoch": 0.79, + "grad_norm": 1.7414822917977066, + "learning_rate": 1.1181663613994798e-06, + "loss": 0.6033, + "step": 11125 + }, + { + "epoch": 0.79, + "grad_norm": 0.6427160612525361, + "learning_rate": 1.117442165311528e-06, + "loss": 0.3947, + "step": 11126 + }, + { + "epoch": 0.79, + "grad_norm": 0.7096478750145321, + "learning_rate": 1.1167181743134347e-06, + "loss": 0.4336, + "step": 11127 + }, + { + "epoch": 0.79, + "grad_norm": 1.8109523665306329, + "learning_rate": 1.1159943884434482e-06, + "loss": 0.5539, + "step": 11128 + }, + { + "epoch": 0.79, + "grad_norm": 2.034902072683474, + "learning_rate": 1.1152708077397973e-06, + "loss": 0.5384, + "step": 11129 + }, + { + "epoch": 0.79, + "grad_norm": 1.8784719444462108, + "learning_rate": 1.1145474322407075e-06, + "loss": 0.5559, + "step": 11130 + }, + { + "epoch": 0.79, + "grad_norm": 1.622403309252361, + "learning_rate": 1.1138242619843847e-06, + "loss": 0.5457, + "step": 11131 + }, + { + "epoch": 0.79, + "grad_norm": 1.6999004716709418, + "learning_rate": 1.113101297009036e-06, + "loss": 0.4983, + "step": 11132 + }, + { + "epoch": 0.79, + "grad_norm": 2.1081469057694524, + "learning_rate": 1.1123785373528457e-06, + "loss": 0.5144, + "step": 11133 + }, + { + "epoch": 0.79, + "grad_norm": 1.6511637177414893, + "learning_rate": 1.111655983053994e-06, + "loss": 0.5262, + "step": 11134 + }, + { + "epoch": 0.79, + "grad_norm": 1.5776658079743169, + "learning_rate": 1.110933634150649e-06, + "loss": 0.4773, + "step": 11135 + }, + { + "epoch": 0.79, + "grad_norm": 1.930519945106859, + "learning_rate": 1.110211490680967e-06, + "loss": 0.5384, + "step": 11136 + }, + { + "epoch": 0.79, + "grad_norm": 1.7659565725303588, + "learning_rate": 1.1094895526830962e-06, + "loss": 0.545, + "step": 11137 + }, + { + "epoch": 0.79, + "grad_norm": 1.770996257828368, + "learning_rate": 1.1087678201951674e-06, + "loss": 0.4837, + "step": 11138 + }, + { + "epoch": 0.79, + "grad_norm": 2.6687343167217743, + "learning_rate": 1.1080462932553098e-06, + "loss": 0.569, + "step": 11139 + }, + { + "epoch": 0.79, + "grad_norm": 1.685860891824896, + "learning_rate": 1.1073249719016344e-06, + "loss": 0.4963, + "step": 11140 + }, + { + "epoch": 0.79, + "grad_norm": 2.2301315632440697, + "learning_rate": 1.106603856172243e-06, + "loss": 0.6189, + "step": 11141 + }, + { + "epoch": 0.79, + "grad_norm": 1.5866212007628233, + "learning_rate": 1.1058829461052295e-06, + "loss": 0.4692, + "step": 11142 + }, + { + "epoch": 0.79, + "grad_norm": 2.5825050382108365, + "learning_rate": 1.1051622417386732e-06, + "loss": 0.5291, + "step": 11143 + }, + { + "epoch": 0.79, + "grad_norm": 1.5796595338982982, + "learning_rate": 1.1044417431106463e-06, + "loss": 0.5407, + "step": 11144 + }, + { + "epoch": 0.79, + "grad_norm": 1.686755937557189, + "learning_rate": 1.1037214502592052e-06, + "loss": 0.5172, + "step": 11145 + }, + { + "epoch": 0.79, + "grad_norm": 0.6929221323398291, + "learning_rate": 1.1030013632223995e-06, + "loss": 0.4323, + "step": 11146 + }, + { + "epoch": 0.79, + "grad_norm": 1.8879598052186182, + "learning_rate": 1.1022814820382672e-06, + "loss": 0.4793, + "step": 11147 + }, + { + "epoch": 0.79, + "grad_norm": 1.8852614481421912, + "learning_rate": 1.1015618067448337e-06, + "loss": 0.4987, + "step": 11148 + }, + { + "epoch": 0.79, + "grad_norm": 1.651180518198633, + "learning_rate": 1.1008423373801163e-06, + "loss": 0.4793, + "step": 11149 + }, + { + "epoch": 0.79, + "grad_norm": 2.003576310707084, + "learning_rate": 1.1001230739821167e-06, + "loss": 0.5189, + "step": 11150 + }, + { + "epoch": 0.79, + "grad_norm": 1.7213631488257894, + "learning_rate": 1.0994040165888331e-06, + "loss": 0.481, + "step": 11151 + }, + { + "epoch": 0.79, + "grad_norm": 1.7779710528983186, + "learning_rate": 1.0986851652382452e-06, + "loss": 0.5921, + "step": 11152 + }, + { + "epoch": 0.79, + "grad_norm": 2.0432025431254726, + "learning_rate": 1.0979665199683265e-06, + "loss": 0.6009, + "step": 11153 + }, + { + "epoch": 0.79, + "grad_norm": 1.5565712984954172, + "learning_rate": 1.0972480808170382e-06, + "loss": 0.4917, + "step": 11154 + }, + { + "epoch": 0.79, + "grad_norm": 1.6240446883184885, + "learning_rate": 1.0965298478223303e-06, + "loss": 0.5798, + "step": 11155 + }, + { + "epoch": 0.79, + "grad_norm": 1.7404132740207419, + "learning_rate": 1.0958118210221441e-06, + "loss": 0.4449, + "step": 11156 + }, + { + "epoch": 0.79, + "grad_norm": 1.8027968115520983, + "learning_rate": 1.0950940004544041e-06, + "loss": 0.4873, + "step": 11157 + }, + { + "epoch": 0.79, + "grad_norm": 2.1639365633793495, + "learning_rate": 1.0943763861570333e-06, + "loss": 0.5365, + "step": 11158 + }, + { + "epoch": 0.79, + "grad_norm": 1.5422627125746204, + "learning_rate": 1.0936589781679347e-06, + "loss": 0.4431, + "step": 11159 + }, + { + "epoch": 0.79, + "grad_norm": 1.8785255004952675, + "learning_rate": 1.0929417765250061e-06, + "loss": 0.4852, + "step": 11160 + }, + { + "epoch": 0.79, + "grad_norm": 1.7836357943258823, + "learning_rate": 1.0922247812661296e-06, + "loss": 0.4949, + "step": 11161 + }, + { + "epoch": 0.79, + "grad_norm": 1.7236054805561156, + "learning_rate": 1.0915079924291827e-06, + "loss": 0.5224, + "step": 11162 + }, + { + "epoch": 0.79, + "grad_norm": 1.898786434369378, + "learning_rate": 1.0907914100520283e-06, + "loss": 0.5341, + "step": 11163 + }, + { + "epoch": 0.79, + "grad_norm": 2.089475040774961, + "learning_rate": 1.090075034172517e-06, + "loss": 0.5897, + "step": 11164 + }, + { + "epoch": 0.79, + "grad_norm": 2.1059460987511107, + "learning_rate": 1.089358864828491e-06, + "loss": 0.4773, + "step": 11165 + }, + { + "epoch": 0.79, + "grad_norm": 1.53142149457687, + "learning_rate": 1.088642902057781e-06, + "loss": 0.5275, + "step": 11166 + }, + { + "epoch": 0.79, + "grad_norm": 0.7895355250373021, + "learning_rate": 1.0879271458982072e-06, + "loss": 0.4343, + "step": 11167 + }, + { + "epoch": 0.79, + "grad_norm": 1.5713675236029516, + "learning_rate": 1.0872115963875767e-06, + "loss": 0.4532, + "step": 11168 + }, + { + "epoch": 0.79, + "grad_norm": 1.5292880607179657, + "learning_rate": 1.0864962535636875e-06, + "loss": 0.4822, + "step": 11169 + }, + { + "epoch": 0.79, + "grad_norm": 1.63805691128887, + "learning_rate": 1.0857811174643274e-06, + "loss": 0.4554, + "step": 11170 + }, + { + "epoch": 0.79, + "grad_norm": 1.6793385899930928, + "learning_rate": 1.0850661881272717e-06, + "loss": 0.4505, + "step": 11171 + }, + { + "epoch": 0.79, + "grad_norm": 1.7980603408807818, + "learning_rate": 1.0843514655902854e-06, + "loss": 0.4966, + "step": 11172 + }, + { + "epoch": 0.79, + "grad_norm": 1.8506529302035666, + "learning_rate": 1.0836369498911225e-06, + "loss": 0.5019, + "step": 11173 + }, + { + "epoch": 0.79, + "grad_norm": 1.9037669051111683, + "learning_rate": 1.0829226410675281e-06, + "loss": 0.5774, + "step": 11174 + }, + { + "epoch": 0.79, + "grad_norm": 1.6595953089528037, + "learning_rate": 1.0822085391572313e-06, + "loss": 0.4871, + "step": 11175 + }, + { + "epoch": 0.79, + "grad_norm": 0.6284332914057973, + "learning_rate": 1.0814946441979546e-06, + "loss": 0.4346, + "step": 11176 + }, + { + "epoch": 0.79, + "grad_norm": 1.6835647852809437, + "learning_rate": 1.0807809562274091e-06, + "loss": 0.4895, + "step": 11177 + }, + { + "epoch": 0.79, + "grad_norm": 0.7538026581725579, + "learning_rate": 1.0800674752832928e-06, + "loss": 0.4179, + "step": 11178 + }, + { + "epoch": 0.79, + "grad_norm": 1.5169441400085024, + "learning_rate": 1.0793542014032965e-06, + "loss": 0.4859, + "step": 11179 + }, + { + "epoch": 0.79, + "grad_norm": 1.657919998980935, + "learning_rate": 1.078641134625094e-06, + "loss": 0.4786, + "step": 11180 + }, + { + "epoch": 0.79, + "grad_norm": 1.526419642531314, + "learning_rate": 1.0779282749863563e-06, + "loss": 0.4743, + "step": 11181 + }, + { + "epoch": 0.79, + "grad_norm": 0.6704425381233902, + "learning_rate": 1.077215622524736e-06, + "loss": 0.4093, + "step": 11182 + }, + { + "epoch": 0.79, + "grad_norm": 1.7837322254114216, + "learning_rate": 1.0765031772778784e-06, + "loss": 0.4889, + "step": 11183 + }, + { + "epoch": 0.79, + "grad_norm": 1.4711503117869231, + "learning_rate": 1.0757909392834176e-06, + "loss": 0.4897, + "step": 11184 + }, + { + "epoch": 0.79, + "grad_norm": 0.6452190795256263, + "learning_rate": 1.0750789085789771e-06, + "loss": 0.4117, + "step": 11185 + }, + { + "epoch": 0.79, + "grad_norm": 1.6978878996497897, + "learning_rate": 1.0743670852021687e-06, + "loss": 0.5163, + "step": 11186 + }, + { + "epoch": 0.79, + "grad_norm": 1.8243618986529708, + "learning_rate": 1.0736554691905897e-06, + "loss": 0.52, + "step": 11187 + }, + { + "epoch": 0.79, + "grad_norm": 1.778538328936432, + "learning_rate": 1.072944060581837e-06, + "loss": 0.4929, + "step": 11188 + }, + { + "epoch": 0.79, + "grad_norm": 1.8396999355508834, + "learning_rate": 1.0722328594134833e-06, + "loss": 0.4882, + "step": 11189 + }, + { + "epoch": 0.79, + "grad_norm": 1.505306199692012, + "learning_rate": 1.0715218657230998e-06, + "loss": 0.5134, + "step": 11190 + }, + { + "epoch": 0.79, + "grad_norm": 1.6111833344627358, + "learning_rate": 1.0708110795482423e-06, + "loss": 0.4444, + "step": 11191 + }, + { + "epoch": 0.79, + "grad_norm": 2.150753717465292, + "learning_rate": 1.0701005009264576e-06, + "loss": 0.5289, + "step": 11192 + }, + { + "epoch": 0.79, + "grad_norm": 1.701918741162257, + "learning_rate": 1.0693901298952818e-06, + "loss": 0.5305, + "step": 11193 + }, + { + "epoch": 0.79, + "grad_norm": 1.6043926277244096, + "learning_rate": 1.0686799664922375e-06, + "loss": 0.5008, + "step": 11194 + }, + { + "epoch": 0.79, + "grad_norm": 1.9687120105738858, + "learning_rate": 1.067970010754838e-06, + "loss": 0.5187, + "step": 11195 + }, + { + "epoch": 0.79, + "grad_norm": 1.6980819377996579, + "learning_rate": 1.0672602627205864e-06, + "loss": 0.5412, + "step": 11196 + }, + { + "epoch": 0.79, + "grad_norm": 1.8345757018648285, + "learning_rate": 1.0665507224269745e-06, + "loss": 0.4761, + "step": 11197 + }, + { + "epoch": 0.79, + "grad_norm": 2.2230268807799183, + "learning_rate": 1.0658413899114806e-06, + "loss": 0.5454, + "step": 11198 + }, + { + "epoch": 0.79, + "grad_norm": 1.471367017086696, + "learning_rate": 1.0651322652115742e-06, + "loss": 0.4927, + "step": 11199 + }, + { + "epoch": 0.79, + "grad_norm": 1.6936977406183227, + "learning_rate": 1.064423348364717e-06, + "loss": 0.4543, + "step": 11200 + }, + { + "epoch": 0.79, + "grad_norm": 1.6959324852011637, + "learning_rate": 1.063714639408353e-06, + "loss": 0.5691, + "step": 11201 + }, + { + "epoch": 0.79, + "grad_norm": 1.9975095222568415, + "learning_rate": 1.0630061383799195e-06, + "loss": 0.5076, + "step": 11202 + }, + { + "epoch": 0.79, + "grad_norm": 1.4476778242794197, + "learning_rate": 1.0622978453168425e-06, + "loss": 0.4835, + "step": 11203 + }, + { + "epoch": 0.8, + "grad_norm": 1.7950793374307685, + "learning_rate": 1.0615897602565373e-06, + "loss": 0.4854, + "step": 11204 + }, + { + "epoch": 0.8, + "grad_norm": 1.9230525268436058, + "learning_rate": 1.0608818832364043e-06, + "loss": 0.5456, + "step": 11205 + }, + { + "epoch": 0.8, + "grad_norm": 3.16668070932607, + "learning_rate": 1.0601742142938381e-06, + "loss": 0.5425, + "step": 11206 + }, + { + "epoch": 0.8, + "grad_norm": 1.8876980579309854, + "learning_rate": 1.05946675346622e-06, + "loss": 0.5461, + "step": 11207 + }, + { + "epoch": 0.8, + "grad_norm": 1.9063616649930932, + "learning_rate": 1.0587595007909202e-06, + "loss": 0.5094, + "step": 11208 + }, + { + "epoch": 0.8, + "grad_norm": 1.538386309880416, + "learning_rate": 1.0580524563052997e-06, + "loss": 0.4627, + "step": 11209 + }, + { + "epoch": 0.8, + "grad_norm": 1.8340928317081577, + "learning_rate": 1.057345620046703e-06, + "loss": 0.5918, + "step": 11210 + }, + { + "epoch": 0.8, + "grad_norm": 1.7316772843205683, + "learning_rate": 1.0566389920524728e-06, + "loss": 0.5084, + "step": 11211 + }, + { + "epoch": 0.8, + "grad_norm": 2.725939466735819, + "learning_rate": 1.055932572359931e-06, + "loss": 0.5355, + "step": 11212 + }, + { + "epoch": 0.8, + "grad_norm": 1.555657920225572, + "learning_rate": 1.0552263610063963e-06, + "loss": 0.4998, + "step": 11213 + }, + { + "epoch": 0.8, + "grad_norm": 1.691753626229931, + "learning_rate": 1.0545203580291707e-06, + "loss": 0.522, + "step": 11214 + }, + { + "epoch": 0.8, + "grad_norm": 1.9533704718835754, + "learning_rate": 1.05381456346555e-06, + "loss": 0.5343, + "step": 11215 + }, + { + "epoch": 0.8, + "grad_norm": 0.794312838566112, + "learning_rate": 1.0531089773528163e-06, + "loss": 0.4295, + "step": 11216 + }, + { + "epoch": 0.8, + "grad_norm": 1.6352570806213012, + "learning_rate": 1.0524035997282377e-06, + "loss": 0.5502, + "step": 11217 + }, + { + "epoch": 0.8, + "grad_norm": 2.1227235809166394, + "learning_rate": 1.0516984306290796e-06, + "loss": 0.481, + "step": 11218 + }, + { + "epoch": 0.8, + "grad_norm": 1.5844463187904028, + "learning_rate": 1.0509934700925883e-06, + "loss": 0.4922, + "step": 11219 + }, + { + "epoch": 0.8, + "grad_norm": 0.6832139725834395, + "learning_rate": 1.0502887181560028e-06, + "loss": 0.4489, + "step": 11220 + }, + { + "epoch": 0.8, + "grad_norm": 1.6741842556122202, + "learning_rate": 1.0495841748565505e-06, + "loss": 0.52, + "step": 11221 + }, + { + "epoch": 0.8, + "grad_norm": 0.7018547386492621, + "learning_rate": 1.0488798402314477e-06, + "loss": 0.421, + "step": 11222 + }, + { + "epoch": 0.8, + "grad_norm": 1.560095275313976, + "learning_rate": 1.0481757143179016e-06, + "loss": 0.5381, + "step": 11223 + }, + { + "epoch": 0.8, + "grad_norm": 1.6096803022182287, + "learning_rate": 1.0474717971531035e-06, + "loss": 0.4593, + "step": 11224 + }, + { + "epoch": 0.8, + "grad_norm": 1.6016734969988498, + "learning_rate": 1.0467680887742382e-06, + "loss": 0.5217, + "step": 11225 + }, + { + "epoch": 0.8, + "grad_norm": 6.358567249553165, + "learning_rate": 1.046064589218478e-06, + "loss": 0.5472, + "step": 11226 + }, + { + "epoch": 0.8, + "grad_norm": 1.5832135878785962, + "learning_rate": 1.0453612985229833e-06, + "loss": 0.4746, + "step": 11227 + }, + { + "epoch": 0.8, + "grad_norm": 1.7769036252275616, + "learning_rate": 1.044658216724907e-06, + "loss": 0.5785, + "step": 11228 + }, + { + "epoch": 0.8, + "grad_norm": 1.9118738864322207, + "learning_rate": 1.0439553438613831e-06, + "loss": 0.5794, + "step": 11229 + }, + { + "epoch": 0.8, + "grad_norm": 1.744364280319165, + "learning_rate": 1.0432526799695459e-06, + "loss": 0.4855, + "step": 11230 + }, + { + "epoch": 0.8, + "grad_norm": 0.6757044129565623, + "learning_rate": 1.0425502250865076e-06, + "loss": 0.4146, + "step": 11231 + }, + { + "epoch": 0.8, + "grad_norm": 1.859169124561317, + "learning_rate": 1.0418479792493775e-06, + "loss": 0.4806, + "step": 11232 + }, + { + "epoch": 0.8, + "grad_norm": 1.9385427776601147, + "learning_rate": 1.041145942495247e-06, + "loss": 0.5307, + "step": 11233 + }, + { + "epoch": 0.8, + "grad_norm": 1.9568555702806791, + "learning_rate": 1.0404441148612044e-06, + "loss": 0.4871, + "step": 11234 + }, + { + "epoch": 0.8, + "grad_norm": 1.6910389124449499, + "learning_rate": 1.0397424963843194e-06, + "loss": 0.552, + "step": 11235 + }, + { + "epoch": 0.8, + "grad_norm": 1.5800851664359956, + "learning_rate": 1.039041087101655e-06, + "loss": 0.5311, + "step": 11236 + }, + { + "epoch": 0.8, + "grad_norm": 1.6571277078178699, + "learning_rate": 1.0383398870502615e-06, + "loss": 0.5369, + "step": 11237 + }, + { + "epoch": 0.8, + "grad_norm": 1.56530741732832, + "learning_rate": 1.0376388962671797e-06, + "loss": 0.5082, + "step": 11238 + }, + { + "epoch": 0.8, + "grad_norm": 1.419038265671975, + "learning_rate": 1.0369381147894387e-06, + "loss": 0.4641, + "step": 11239 + }, + { + "epoch": 0.8, + "grad_norm": 1.958854165287303, + "learning_rate": 1.036237542654052e-06, + "loss": 0.4854, + "step": 11240 + }, + { + "epoch": 0.8, + "grad_norm": 1.974030312244267, + "learning_rate": 1.035537179898033e-06, + "loss": 0.5412, + "step": 11241 + }, + { + "epoch": 0.8, + "grad_norm": 1.3635790482005044, + "learning_rate": 1.0348370265583718e-06, + "loss": 0.4591, + "step": 11242 + }, + { + "epoch": 0.8, + "grad_norm": 0.74069473441299, + "learning_rate": 1.0341370826720542e-06, + "loss": 0.414, + "step": 11243 + }, + { + "epoch": 0.8, + "grad_norm": 0.7017157193597974, + "learning_rate": 1.0334373482760545e-06, + "loss": 0.4277, + "step": 11244 + }, + { + "epoch": 0.8, + "grad_norm": 1.3531103955907227, + "learning_rate": 1.032737823407335e-06, + "loss": 0.4472, + "step": 11245 + }, + { + "epoch": 0.8, + "grad_norm": 2.047917649106481, + "learning_rate": 1.0320385081028478e-06, + "loss": 0.5373, + "step": 11246 + }, + { + "epoch": 0.8, + "grad_norm": 1.9841614516203951, + "learning_rate": 1.03133940239953e-06, + "loss": 0.5538, + "step": 11247 + }, + { + "epoch": 0.8, + "grad_norm": 0.7271225706796668, + "learning_rate": 1.0306405063343128e-06, + "loss": 0.4412, + "step": 11248 + }, + { + "epoch": 0.8, + "grad_norm": 2.0359063886262168, + "learning_rate": 1.0299418199441146e-06, + "loss": 0.5393, + "step": 11249 + }, + { + "epoch": 0.8, + "grad_norm": 1.8229399706705152, + "learning_rate": 1.0292433432658415e-06, + "loss": 0.5174, + "step": 11250 + }, + { + "epoch": 0.8, + "grad_norm": 1.6247049215096885, + "learning_rate": 1.0285450763363896e-06, + "loss": 0.5333, + "step": 11251 + }, + { + "epoch": 0.8, + "grad_norm": 1.74968438512329, + "learning_rate": 1.0278470191926442e-06, + "loss": 0.4747, + "step": 11252 + }, + { + "epoch": 0.8, + "grad_norm": 1.6868810242861803, + "learning_rate": 1.0271491718714798e-06, + "loss": 0.5454, + "step": 11253 + }, + { + "epoch": 0.8, + "grad_norm": 1.6430458720890107, + "learning_rate": 1.026451534409757e-06, + "loss": 0.4874, + "step": 11254 + }, + { + "epoch": 0.8, + "grad_norm": 1.8258672822223552, + "learning_rate": 1.0257541068443277e-06, + "loss": 0.5261, + "step": 11255 + }, + { + "epoch": 0.8, + "grad_norm": 1.643039303350932, + "learning_rate": 1.0250568892120339e-06, + "loss": 0.5017, + "step": 11256 + }, + { + "epoch": 0.8, + "grad_norm": 1.602833255793951, + "learning_rate": 1.0243598815497036e-06, + "loss": 0.548, + "step": 11257 + }, + { + "epoch": 0.8, + "grad_norm": 1.5959711964924643, + "learning_rate": 1.0236630838941575e-06, + "loss": 0.4978, + "step": 11258 + }, + { + "epoch": 0.8, + "grad_norm": 1.6276797577956414, + "learning_rate": 1.0229664962821977e-06, + "loss": 0.4541, + "step": 11259 + }, + { + "epoch": 0.8, + "grad_norm": 1.662179601231129, + "learning_rate": 1.0222701187506268e-06, + "loss": 0.4803, + "step": 11260 + }, + { + "epoch": 0.8, + "grad_norm": 1.6539954849931506, + "learning_rate": 1.021573951336225e-06, + "loss": 0.4778, + "step": 11261 + }, + { + "epoch": 0.8, + "grad_norm": 1.4391245952132268, + "learning_rate": 1.020877994075769e-06, + "loss": 0.4554, + "step": 11262 + }, + { + "epoch": 0.8, + "grad_norm": 1.4764173203615507, + "learning_rate": 1.020182247006018e-06, + "loss": 0.4864, + "step": 11263 + }, + { + "epoch": 0.8, + "grad_norm": 1.5895719211089743, + "learning_rate": 1.019486710163729e-06, + "loss": 0.5934, + "step": 11264 + }, + { + "epoch": 0.8, + "grad_norm": 1.7690808229285628, + "learning_rate": 1.0187913835856384e-06, + "loss": 0.4776, + "step": 11265 + }, + { + "epoch": 0.8, + "grad_norm": 1.8741920771391927, + "learning_rate": 1.0180962673084754e-06, + "loss": 0.5576, + "step": 11266 + }, + { + "epoch": 0.8, + "grad_norm": 2.027334830807492, + "learning_rate": 1.0174013613689633e-06, + "loss": 0.5765, + "step": 11267 + }, + { + "epoch": 0.8, + "grad_norm": 2.1233094933883145, + "learning_rate": 1.0167066658038045e-06, + "loss": 0.5678, + "step": 11268 + }, + { + "epoch": 0.8, + "grad_norm": 1.6240466848459032, + "learning_rate": 1.016012180649698e-06, + "loss": 0.5786, + "step": 11269 + }, + { + "epoch": 0.8, + "grad_norm": 1.7662460861256604, + "learning_rate": 1.0153179059433254e-06, + "loss": 0.5129, + "step": 11270 + }, + { + "epoch": 0.8, + "grad_norm": 1.8739949639578033, + "learning_rate": 1.0146238417213654e-06, + "loss": 0.5851, + "step": 11271 + }, + { + "epoch": 0.8, + "grad_norm": 1.6660511290325462, + "learning_rate": 1.0139299880204773e-06, + "loss": 0.5519, + "step": 11272 + }, + { + "epoch": 0.8, + "grad_norm": 2.0197271678444064, + "learning_rate": 1.0132363448773135e-06, + "loss": 0.5066, + "step": 11273 + }, + { + "epoch": 0.8, + "grad_norm": 4.372904521366764, + "learning_rate": 1.0125429123285152e-06, + "loss": 0.5562, + "step": 11274 + }, + { + "epoch": 0.8, + "grad_norm": 1.7676138329453863, + "learning_rate": 1.0118496904107117e-06, + "loss": 0.5189, + "step": 11275 + }, + { + "epoch": 0.8, + "grad_norm": 2.742781051032969, + "learning_rate": 1.0111566791605227e-06, + "loss": 0.4771, + "step": 11276 + }, + { + "epoch": 0.8, + "grad_norm": 1.6866914910595707, + "learning_rate": 1.0104638786145526e-06, + "loss": 0.4683, + "step": 11277 + }, + { + "epoch": 0.8, + "grad_norm": 1.5221461906571683, + "learning_rate": 1.0097712888093985e-06, + "loss": 0.5388, + "step": 11278 + }, + { + "epoch": 0.8, + "grad_norm": 1.7501840965823392, + "learning_rate": 1.0090789097816456e-06, + "loss": 0.4673, + "step": 11279 + }, + { + "epoch": 0.8, + "grad_norm": 1.5845013277869782, + "learning_rate": 1.0083867415678683e-06, + "loss": 0.48, + "step": 11280 + }, + { + "epoch": 0.8, + "grad_norm": 1.8373876279578896, + "learning_rate": 1.0076947842046296e-06, + "loss": 0.4181, + "step": 11281 + }, + { + "epoch": 0.8, + "grad_norm": 2.030640035398701, + "learning_rate": 1.007003037728478e-06, + "loss": 0.4926, + "step": 11282 + }, + { + "epoch": 0.8, + "grad_norm": 1.5456396040907112, + "learning_rate": 1.0063115021759584e-06, + "loss": 0.5245, + "step": 11283 + }, + { + "epoch": 0.8, + "grad_norm": 1.5809970510631979, + "learning_rate": 1.0056201775835962e-06, + "loss": 0.5285, + "step": 11284 + }, + { + "epoch": 0.8, + "grad_norm": 1.7012260696415753, + "learning_rate": 1.004929063987911e-06, + "loss": 0.524, + "step": 11285 + }, + { + "epoch": 0.8, + "grad_norm": 2.01427925484465, + "learning_rate": 1.0042381614254103e-06, + "loss": 0.5569, + "step": 11286 + }, + { + "epoch": 0.8, + "grad_norm": 1.7973437892148814, + "learning_rate": 1.0035474699325891e-06, + "loss": 0.5205, + "step": 11287 + }, + { + "epoch": 0.8, + "grad_norm": 1.8344983678319808, + "learning_rate": 1.0028569895459334e-06, + "loss": 0.4621, + "step": 11288 + }, + { + "epoch": 0.8, + "grad_norm": 1.8884431348681736, + "learning_rate": 1.0021667203019136e-06, + "loss": 0.5545, + "step": 11289 + }, + { + "epoch": 0.8, + "grad_norm": 1.997099536414051, + "learning_rate": 1.0014766622369965e-06, + "loss": 0.5648, + "step": 11290 + }, + { + "epoch": 0.8, + "grad_norm": 1.7455785530881238, + "learning_rate": 1.00078681538763e-06, + "loss": 0.4992, + "step": 11291 + }, + { + "epoch": 0.8, + "grad_norm": 1.925086172712342, + "learning_rate": 1.000097179790256e-06, + "loss": 0.5748, + "step": 11292 + }, + { + "epoch": 0.8, + "grad_norm": 1.7296727085013825, + "learning_rate": 9.994077554812998e-07, + "loss": 0.4993, + "step": 11293 + }, + { + "epoch": 0.8, + "grad_norm": 1.747736662318826, + "learning_rate": 9.98718542497183e-07, + "loss": 0.5242, + "step": 11294 + }, + { + "epoch": 0.8, + "grad_norm": 1.5412689579217458, + "learning_rate": 9.980295408743123e-07, + "loss": 0.4753, + "step": 11295 + }, + { + "epoch": 0.8, + "grad_norm": 3.202185540638962, + "learning_rate": 9.973407506490811e-07, + "loss": 0.5154, + "step": 11296 + }, + { + "epoch": 0.8, + "grad_norm": 1.7907739538212493, + "learning_rate": 9.966521718578737e-07, + "loss": 0.4838, + "step": 11297 + }, + { + "epoch": 0.8, + "grad_norm": 1.6349700752509877, + "learning_rate": 9.95963804537064e-07, + "loss": 0.5035, + "step": 11298 + }, + { + "epoch": 0.8, + "grad_norm": 1.7189847878783926, + "learning_rate": 9.952756487230152e-07, + "loss": 0.5169, + "step": 11299 + }, + { + "epoch": 0.8, + "grad_norm": 0.6993075710889468, + "learning_rate": 9.945877044520736e-07, + "loss": 0.4093, + "step": 11300 + }, + { + "epoch": 0.8, + "grad_norm": 1.8269426114870027, + "learning_rate": 9.93899971760584e-07, + "loss": 0.5438, + "step": 11301 + }, + { + "epoch": 0.8, + "grad_norm": 2.209143675968071, + "learning_rate": 9.932124506848718e-07, + "loss": 0.4694, + "step": 11302 + }, + { + "epoch": 0.8, + "grad_norm": 0.737565124124016, + "learning_rate": 9.92525141261254e-07, + "loss": 0.406, + "step": 11303 + }, + { + "epoch": 0.8, + "grad_norm": 1.897537849224304, + "learning_rate": 9.918380435260384e-07, + "loss": 0.4946, + "step": 11304 + }, + { + "epoch": 0.8, + "grad_norm": 1.8188239591604525, + "learning_rate": 9.911511575155181e-07, + "loss": 0.5989, + "step": 11305 + }, + { + "epoch": 0.8, + "grad_norm": 0.7624925439489431, + "learning_rate": 9.904644832659788e-07, + "loss": 0.4063, + "step": 11306 + }, + { + "epoch": 0.8, + "grad_norm": 1.9721404466234673, + "learning_rate": 9.897780208136903e-07, + "loss": 0.5354, + "step": 11307 + }, + { + "epoch": 0.8, + "grad_norm": 1.7555857344038668, + "learning_rate": 9.890917701949154e-07, + "loss": 0.5253, + "step": 11308 + }, + { + "epoch": 0.8, + "grad_norm": 1.7994873118273602, + "learning_rate": 9.884057314459045e-07, + "loss": 0.492, + "step": 11309 + }, + { + "epoch": 0.8, + "grad_norm": 2.83234422689632, + "learning_rate": 9.87719904602895e-07, + "loss": 0.5526, + "step": 11310 + }, + { + "epoch": 0.8, + "grad_norm": 0.7017765208363271, + "learning_rate": 9.87034289702118e-07, + "loss": 0.4316, + "step": 11311 + }, + { + "epoch": 0.8, + "grad_norm": 1.9780567669422324, + "learning_rate": 9.863488867797843e-07, + "loss": 0.5005, + "step": 11312 + }, + { + "epoch": 0.8, + "grad_norm": 1.8921110164547692, + "learning_rate": 9.856636958721056e-07, + "loss": 0.482, + "step": 11313 + }, + { + "epoch": 0.8, + "grad_norm": 1.6612827718152423, + "learning_rate": 9.849787170152708e-07, + "loss": 0.4947, + "step": 11314 + }, + { + "epoch": 0.8, + "grad_norm": 1.6305622843380931, + "learning_rate": 9.842939502454656e-07, + "loss": 0.5562, + "step": 11315 + }, + { + "epoch": 0.8, + "grad_norm": 1.7890266840807443, + "learning_rate": 9.836093955988606e-07, + "loss": 0.5068, + "step": 11316 + }, + { + "epoch": 0.8, + "grad_norm": 0.7002631125001765, + "learning_rate": 9.82925053111617e-07, + "loss": 0.4337, + "step": 11317 + }, + { + "epoch": 0.8, + "grad_norm": 2.172150856521467, + "learning_rate": 9.822409228198854e-07, + "loss": 0.5918, + "step": 11318 + }, + { + "epoch": 0.8, + "grad_norm": 1.6736999570847173, + "learning_rate": 9.815570047597989e-07, + "loss": 0.55, + "step": 11319 + }, + { + "epoch": 0.8, + "grad_norm": 7.592727368431582, + "learning_rate": 9.808732989674907e-07, + "loss": 0.5709, + "step": 11320 + }, + { + "epoch": 0.8, + "grad_norm": 1.9580933607882423, + "learning_rate": 9.801898054790726e-07, + "loss": 0.5697, + "step": 11321 + }, + { + "epoch": 0.8, + "grad_norm": 1.621275298163921, + "learning_rate": 9.795065243306495e-07, + "loss": 0.5348, + "step": 11322 + }, + { + "epoch": 0.8, + "grad_norm": 1.5381814835425238, + "learning_rate": 9.788234555583153e-07, + "loss": 0.4978, + "step": 11323 + }, + { + "epoch": 0.8, + "grad_norm": 1.5912911217912171, + "learning_rate": 9.78140599198152e-07, + "loss": 0.479, + "step": 11324 + }, + { + "epoch": 0.8, + "grad_norm": 1.5246572283666426, + "learning_rate": 9.774579552862307e-07, + "loss": 0.4498, + "step": 11325 + }, + { + "epoch": 0.8, + "grad_norm": 0.6995513582959493, + "learning_rate": 9.767755238586097e-07, + "loss": 0.4089, + "step": 11326 + }, + { + "epoch": 0.8, + "grad_norm": 1.7483635073494488, + "learning_rate": 9.760933049513378e-07, + "loss": 0.5563, + "step": 11327 + }, + { + "epoch": 0.8, + "grad_norm": 1.775802423221744, + "learning_rate": 9.754112986004527e-07, + "loss": 0.5285, + "step": 11328 + }, + { + "epoch": 0.8, + "grad_norm": 0.7602698235954728, + "learning_rate": 9.747295048419813e-07, + "loss": 0.4414, + "step": 11329 + }, + { + "epoch": 0.8, + "grad_norm": 1.616324833828178, + "learning_rate": 9.740479237119337e-07, + "loss": 0.5141, + "step": 11330 + }, + { + "epoch": 0.8, + "grad_norm": 1.9453668205876433, + "learning_rate": 9.733665552463184e-07, + "loss": 0.5071, + "step": 11331 + }, + { + "epoch": 0.8, + "grad_norm": 1.9143893603852753, + "learning_rate": 9.726853994811269e-07, + "loss": 0.5855, + "step": 11332 + }, + { + "epoch": 0.8, + "grad_norm": 0.6031974543925593, + "learning_rate": 9.720044564523379e-07, + "loss": 0.4255, + "step": 11333 + }, + { + "epoch": 0.8, + "grad_norm": 1.7169815780706554, + "learning_rate": 9.713237261959223e-07, + "loss": 0.5109, + "step": 11334 + }, + { + "epoch": 0.8, + "grad_norm": 1.810057457122053, + "learning_rate": 9.706432087478385e-07, + "loss": 0.577, + "step": 11335 + }, + { + "epoch": 0.8, + "grad_norm": 1.8168647326606424, + "learning_rate": 9.699629041440345e-07, + "loss": 0.4996, + "step": 11336 + }, + { + "epoch": 0.8, + "grad_norm": 1.7455573947035439, + "learning_rate": 9.692828124204446e-07, + "loss": 0.524, + "step": 11337 + }, + { + "epoch": 0.8, + "grad_norm": 1.6733443172691176, + "learning_rate": 9.686029336129942e-07, + "loss": 0.5422, + "step": 11338 + }, + { + "epoch": 0.8, + "grad_norm": 1.7017681759225547, + "learning_rate": 9.679232677575978e-07, + "loss": 0.535, + "step": 11339 + }, + { + "epoch": 0.8, + "grad_norm": 1.547736157076401, + "learning_rate": 9.672438148901564e-07, + "loss": 0.5196, + "step": 11340 + }, + { + "epoch": 0.8, + "grad_norm": 1.4400199766700286, + "learning_rate": 9.66564575046563e-07, + "loss": 0.4838, + "step": 11341 + }, + { + "epoch": 0.8, + "grad_norm": 1.9306752176824982, + "learning_rate": 9.658855482626933e-07, + "loss": 0.5093, + "step": 11342 + }, + { + "epoch": 0.8, + "grad_norm": 3.5808182739425645, + "learning_rate": 9.652067345744204e-07, + "loss": 0.4372, + "step": 11343 + }, + { + "epoch": 0.8, + "grad_norm": 1.8101458378227766, + "learning_rate": 9.645281340175988e-07, + "loss": 0.4719, + "step": 11344 + }, + { + "epoch": 0.81, + "grad_norm": 2.065174261335442, + "learning_rate": 9.638497466280756e-07, + "loss": 0.5161, + "step": 11345 + }, + { + "epoch": 0.81, + "grad_norm": 1.7513698973138914, + "learning_rate": 9.631715724416846e-07, + "loss": 0.5699, + "step": 11346 + }, + { + "epoch": 0.81, + "grad_norm": 1.872464287888122, + "learning_rate": 9.624936114942496e-07, + "loss": 0.5042, + "step": 11347 + }, + { + "epoch": 0.81, + "grad_norm": 0.6966883748993468, + "learning_rate": 9.618158638215846e-07, + "loss": 0.4628, + "step": 11348 + }, + { + "epoch": 0.81, + "grad_norm": 1.8932863605899417, + "learning_rate": 9.611383294594862e-07, + "loss": 0.5753, + "step": 11349 + }, + { + "epoch": 0.81, + "grad_norm": 0.664643126447566, + "learning_rate": 9.604610084437493e-07, + "loss": 0.4352, + "step": 11350 + }, + { + "epoch": 0.81, + "grad_norm": 2.1056715216520465, + "learning_rate": 9.597839008101484e-07, + "loss": 0.4958, + "step": 11351 + }, + { + "epoch": 0.81, + "grad_norm": 2.20856386000501, + "learning_rate": 9.591070065944524e-07, + "loss": 0.4884, + "step": 11352 + }, + { + "epoch": 0.81, + "grad_norm": 1.5760012699770996, + "learning_rate": 9.584303258324163e-07, + "loss": 0.5716, + "step": 11353 + }, + { + "epoch": 0.81, + "grad_norm": 1.578295699784479, + "learning_rate": 9.57753858559785e-07, + "loss": 0.5213, + "step": 11354 + }, + { + "epoch": 0.81, + "grad_norm": 1.7353295777821152, + "learning_rate": 9.570776048122938e-07, + "loss": 0.4161, + "step": 11355 + }, + { + "epoch": 0.81, + "grad_norm": 1.562285898224663, + "learning_rate": 9.56401564625661e-07, + "loss": 0.5287, + "step": 11356 + }, + { + "epoch": 0.81, + "grad_norm": 2.1628738499128253, + "learning_rate": 9.557257380355995e-07, + "loss": 0.5046, + "step": 11357 + }, + { + "epoch": 0.81, + "grad_norm": 2.1189800258548654, + "learning_rate": 9.550501250778082e-07, + "loss": 0.5342, + "step": 11358 + }, + { + "epoch": 0.81, + "grad_norm": 1.7436801491295326, + "learning_rate": 9.543747257879755e-07, + "loss": 0.5165, + "step": 11359 + }, + { + "epoch": 0.81, + "grad_norm": 1.5189451747586926, + "learning_rate": 9.53699540201779e-07, + "loss": 0.5193, + "step": 11360 + }, + { + "epoch": 0.81, + "grad_norm": 1.8186777529850144, + "learning_rate": 9.530245683548817e-07, + "loss": 0.4892, + "step": 11361 + }, + { + "epoch": 0.81, + "grad_norm": 1.8772213200819938, + "learning_rate": 9.523498102829421e-07, + "loss": 0.5029, + "step": 11362 + }, + { + "epoch": 0.81, + "grad_norm": 1.6698644250995676, + "learning_rate": 9.516752660215994e-07, + "loss": 0.5269, + "step": 11363 + }, + { + "epoch": 0.81, + "grad_norm": 2.6980975903759923, + "learning_rate": 9.510009356064865e-07, + "loss": 0.4955, + "step": 11364 + }, + { + "epoch": 0.81, + "grad_norm": 1.7044455079214986, + "learning_rate": 9.503268190732245e-07, + "loss": 0.5094, + "step": 11365 + }, + { + "epoch": 0.81, + "grad_norm": 2.9954361961568905, + "learning_rate": 9.496529164574231e-07, + "loss": 0.4444, + "step": 11366 + }, + { + "epoch": 0.81, + "grad_norm": 1.7040010860621926, + "learning_rate": 9.48979227794678e-07, + "loss": 0.5502, + "step": 11367 + }, + { + "epoch": 0.81, + "grad_norm": 1.7736057140736934, + "learning_rate": 9.483057531205769e-07, + "loss": 0.5338, + "step": 11368 + }, + { + "epoch": 0.81, + "grad_norm": 1.5809655816442083, + "learning_rate": 9.476324924706948e-07, + "loss": 0.5268, + "step": 11369 + }, + { + "epoch": 0.81, + "grad_norm": 0.6978031736891618, + "learning_rate": 9.469594458805958e-07, + "loss": 0.448, + "step": 11370 + }, + { + "epoch": 0.81, + "grad_norm": 1.7972289124689291, + "learning_rate": 9.462866133858333e-07, + "loss": 0.5054, + "step": 11371 + }, + { + "epoch": 0.81, + "grad_norm": 2.147941262717982, + "learning_rate": 9.456139950219456e-07, + "loss": 0.5136, + "step": 11372 + }, + { + "epoch": 0.81, + "grad_norm": 2.1650067067819485, + "learning_rate": 9.449415908244675e-07, + "loss": 0.5471, + "step": 11373 + }, + { + "epoch": 0.81, + "grad_norm": 5.150840192628067, + "learning_rate": 9.442694008289133e-07, + "loss": 0.507, + "step": 11374 + }, + { + "epoch": 0.81, + "grad_norm": 1.6273699952619212, + "learning_rate": 9.435974250707924e-07, + "loss": 0.5032, + "step": 11375 + }, + { + "epoch": 0.81, + "grad_norm": 1.7169425884171914, + "learning_rate": 9.429256635856005e-07, + "loss": 0.4931, + "step": 11376 + }, + { + "epoch": 0.81, + "grad_norm": 1.6799521087795601, + "learning_rate": 9.422541164088222e-07, + "loss": 0.56, + "step": 11377 + }, + { + "epoch": 0.81, + "grad_norm": 1.9169903707200466, + "learning_rate": 9.415827835759322e-07, + "loss": 0.5349, + "step": 11378 + }, + { + "epoch": 0.81, + "grad_norm": 1.8556671747937616, + "learning_rate": 9.409116651223888e-07, + "loss": 0.3749, + "step": 11379 + }, + { + "epoch": 0.81, + "grad_norm": 0.6800788418201351, + "learning_rate": 9.402407610836479e-07, + "loss": 0.4004, + "step": 11380 + }, + { + "epoch": 0.81, + "grad_norm": 1.5604774961518988, + "learning_rate": 9.39570071495145e-07, + "loss": 0.5555, + "step": 11381 + }, + { + "epoch": 0.81, + "grad_norm": 1.7881496019767595, + "learning_rate": 9.388995963923092e-07, + "loss": 0.5113, + "step": 11382 + }, + { + "epoch": 0.81, + "grad_norm": 1.7873421602199242, + "learning_rate": 9.382293358105582e-07, + "loss": 0.5244, + "step": 11383 + }, + { + "epoch": 0.81, + "grad_norm": 1.6869319487609227, + "learning_rate": 9.375592897852965e-07, + "loss": 0.5323, + "step": 11384 + }, + { + "epoch": 0.81, + "grad_norm": 1.608282551078421, + "learning_rate": 9.3688945835192e-07, + "loss": 0.5089, + "step": 11385 + }, + { + "epoch": 0.81, + "grad_norm": 0.7169645751096076, + "learning_rate": 9.36219841545809e-07, + "loss": 0.4172, + "step": 11386 + }, + { + "epoch": 0.81, + "grad_norm": 1.8531746311695307, + "learning_rate": 9.355504394023357e-07, + "loss": 0.508, + "step": 11387 + }, + { + "epoch": 0.81, + "grad_norm": 1.855111064806148, + "learning_rate": 9.348812519568601e-07, + "loss": 0.5412, + "step": 11388 + }, + { + "epoch": 0.81, + "grad_norm": 1.699777286853456, + "learning_rate": 9.342122792447317e-07, + "loss": 0.5072, + "step": 11389 + }, + { + "epoch": 0.81, + "grad_norm": 1.750406852364639, + "learning_rate": 9.335435213012883e-07, + "loss": 0.5282, + "step": 11390 + }, + { + "epoch": 0.81, + "grad_norm": 1.7411926031218778, + "learning_rate": 9.328749781618529e-07, + "loss": 0.505, + "step": 11391 + }, + { + "epoch": 0.81, + "grad_norm": 1.547791745381671, + "learning_rate": 9.322066498617449e-07, + "loss": 0.5181, + "step": 11392 + }, + { + "epoch": 0.81, + "grad_norm": 1.6132651709581736, + "learning_rate": 9.315385364362639e-07, + "loss": 0.5349, + "step": 11393 + }, + { + "epoch": 0.81, + "grad_norm": 1.8259910066071368, + "learning_rate": 9.308706379207033e-07, + "loss": 0.495, + "step": 11394 + }, + { + "epoch": 0.81, + "grad_norm": 1.5430699434370803, + "learning_rate": 9.302029543503438e-07, + "loss": 0.5106, + "step": 11395 + }, + { + "epoch": 0.81, + "grad_norm": 1.4908708370028025, + "learning_rate": 9.29535485760456e-07, + "loss": 0.5016, + "step": 11396 + }, + { + "epoch": 0.81, + "grad_norm": 1.7606120089248032, + "learning_rate": 9.288682321862952e-07, + "loss": 0.5118, + "step": 11397 + }, + { + "epoch": 0.81, + "grad_norm": 1.8492275677524488, + "learning_rate": 9.282011936631074e-07, + "loss": 0.5412, + "step": 11398 + }, + { + "epoch": 0.81, + "grad_norm": 1.9763270715124037, + "learning_rate": 9.27534370226133e-07, + "loss": 0.416, + "step": 11399 + }, + { + "epoch": 0.81, + "grad_norm": 2.7847722348253394, + "learning_rate": 9.268677619105909e-07, + "loss": 0.5598, + "step": 11400 + }, + { + "epoch": 0.81, + "grad_norm": 2.0868305956048934, + "learning_rate": 9.262013687516963e-07, + "loss": 0.4431, + "step": 11401 + }, + { + "epoch": 0.81, + "grad_norm": 1.8060922511848032, + "learning_rate": 9.255351907846471e-07, + "loss": 0.5341, + "step": 11402 + }, + { + "epoch": 0.81, + "grad_norm": 1.7090484923204894, + "learning_rate": 9.248692280446375e-07, + "loss": 0.5538, + "step": 11403 + }, + { + "epoch": 0.81, + "grad_norm": 0.7228495982581072, + "learning_rate": 9.242034805668431e-07, + "loss": 0.4267, + "step": 11404 + }, + { + "epoch": 0.81, + "grad_norm": 1.611887104930997, + "learning_rate": 9.23537948386431e-07, + "loss": 0.4945, + "step": 11405 + }, + { + "epoch": 0.81, + "grad_norm": 1.7023374711029844, + "learning_rate": 9.228726315385578e-07, + "loss": 0.4779, + "step": 11406 + }, + { + "epoch": 0.81, + "grad_norm": 1.5861331149525635, + "learning_rate": 9.222075300583666e-07, + "loss": 0.5165, + "step": 11407 + }, + { + "epoch": 0.81, + "grad_norm": 1.563240728054472, + "learning_rate": 9.215426439809932e-07, + "loss": 0.4644, + "step": 11408 + }, + { + "epoch": 0.81, + "grad_norm": 2.2715797945667107, + "learning_rate": 9.208779733415557e-07, + "loss": 0.4654, + "step": 11409 + }, + { + "epoch": 0.81, + "grad_norm": 1.9585246257289508, + "learning_rate": 9.202135181751654e-07, + "loss": 0.557, + "step": 11410 + }, + { + "epoch": 0.81, + "grad_norm": 1.5293403027352221, + "learning_rate": 9.195492785169208e-07, + "loss": 0.4981, + "step": 11411 + }, + { + "epoch": 0.81, + "grad_norm": 1.7787224817853278, + "learning_rate": 9.188852544019105e-07, + "loss": 0.5752, + "step": 11412 + }, + { + "epoch": 0.81, + "grad_norm": 1.7716955244654948, + "learning_rate": 9.182214458652095e-07, + "loss": 0.5403, + "step": 11413 + }, + { + "epoch": 0.81, + "grad_norm": 1.8253558377806711, + "learning_rate": 9.175578529418828e-07, + "loss": 0.5443, + "step": 11414 + }, + { + "epoch": 0.81, + "grad_norm": 1.7286096604667267, + "learning_rate": 9.168944756669845e-07, + "loss": 0.5297, + "step": 11415 + }, + { + "epoch": 0.81, + "grad_norm": 0.6843084230179324, + "learning_rate": 9.162313140755541e-07, + "loss": 0.4091, + "step": 11416 + }, + { + "epoch": 0.81, + "grad_norm": 0.658878938244442, + "learning_rate": 9.155683682026239e-07, + "loss": 0.4212, + "step": 11417 + }, + { + "epoch": 0.81, + "grad_norm": 1.700426736796978, + "learning_rate": 9.149056380832122e-07, + "loss": 0.5235, + "step": 11418 + }, + { + "epoch": 0.81, + "grad_norm": 1.5615117119993294, + "learning_rate": 9.142431237523269e-07, + "loss": 0.4906, + "step": 11419 + }, + { + "epoch": 0.81, + "grad_norm": 1.7946811734534318, + "learning_rate": 9.135808252449651e-07, + "loss": 0.4807, + "step": 11420 + }, + { + "epoch": 0.81, + "grad_norm": 2.1244915772021558, + "learning_rate": 9.12918742596109e-07, + "loss": 0.5546, + "step": 11421 + }, + { + "epoch": 0.81, + "grad_norm": 3.092374141488237, + "learning_rate": 9.122568758407358e-07, + "loss": 0.5012, + "step": 11422 + }, + { + "epoch": 0.81, + "grad_norm": 2.105735506070524, + "learning_rate": 9.115952250138043e-07, + "loss": 0.5561, + "step": 11423 + }, + { + "epoch": 0.81, + "grad_norm": 1.6100361746014882, + "learning_rate": 9.10933790150268e-07, + "loss": 0.4793, + "step": 11424 + }, + { + "epoch": 0.81, + "grad_norm": 2.116538221484533, + "learning_rate": 9.102725712850619e-07, + "loss": 0.5695, + "step": 11425 + }, + { + "epoch": 0.81, + "grad_norm": 1.9736229655512154, + "learning_rate": 9.096115684531176e-07, + "loss": 0.4861, + "step": 11426 + }, + { + "epoch": 0.81, + "grad_norm": 5.439199921776878, + "learning_rate": 9.089507816893517e-07, + "loss": 0.5289, + "step": 11427 + }, + { + "epoch": 0.81, + "grad_norm": 1.5160212969197129, + "learning_rate": 9.082902110286656e-07, + "loss": 0.4525, + "step": 11428 + }, + { + "epoch": 0.81, + "grad_norm": 2.5851088918676406, + "learning_rate": 9.076298565059576e-07, + "loss": 0.4999, + "step": 11429 + }, + { + "epoch": 0.81, + "grad_norm": 1.8531147122537543, + "learning_rate": 9.069697181561061e-07, + "loss": 0.5475, + "step": 11430 + }, + { + "epoch": 0.81, + "grad_norm": 2.050832444769667, + "learning_rate": 9.063097960139844e-07, + "loss": 0.6065, + "step": 11431 + }, + { + "epoch": 0.81, + "grad_norm": 2.803483367721511, + "learning_rate": 9.056500901144477e-07, + "loss": 0.5714, + "step": 11432 + }, + { + "epoch": 0.81, + "grad_norm": 1.9697589718580937, + "learning_rate": 9.049906004923498e-07, + "loss": 0.5377, + "step": 11433 + }, + { + "epoch": 0.81, + "grad_norm": 1.8302198825783438, + "learning_rate": 9.043313271825227e-07, + "loss": 0.5651, + "step": 11434 + }, + { + "epoch": 0.81, + "grad_norm": 1.5939394625571612, + "learning_rate": 9.036722702197931e-07, + "loss": 0.476, + "step": 11435 + }, + { + "epoch": 0.81, + "grad_norm": 1.6545358083012638, + "learning_rate": 9.030134296389747e-07, + "loss": 0.5205, + "step": 11436 + }, + { + "epoch": 0.81, + "grad_norm": 1.9416592647029627, + "learning_rate": 9.023548054748688e-07, + "loss": 0.4587, + "step": 11437 + }, + { + "epoch": 0.81, + "grad_norm": 1.6534329514064374, + "learning_rate": 9.016963977622684e-07, + "loss": 0.5218, + "step": 11438 + }, + { + "epoch": 0.81, + "grad_norm": 1.6850412836553035, + "learning_rate": 9.010382065359496e-07, + "loss": 0.5707, + "step": 11439 + }, + { + "epoch": 0.81, + "grad_norm": 1.7236297411331254, + "learning_rate": 9.003802318306825e-07, + "loss": 0.5618, + "step": 11440 + }, + { + "epoch": 0.81, + "grad_norm": 1.7391241322889712, + "learning_rate": 8.997224736812227e-07, + "loss": 0.5143, + "step": 11441 + }, + { + "epoch": 0.81, + "grad_norm": 5.665125675988946, + "learning_rate": 8.990649321223155e-07, + "loss": 0.4675, + "step": 11442 + }, + { + "epoch": 0.81, + "grad_norm": 1.8963080395212901, + "learning_rate": 8.984076071886944e-07, + "loss": 0.4834, + "step": 11443 + }, + { + "epoch": 0.81, + "grad_norm": 1.7768184235166375, + "learning_rate": 8.977504989150815e-07, + "loss": 0.5415, + "step": 11444 + }, + { + "epoch": 0.81, + "grad_norm": 1.6207616297188878, + "learning_rate": 8.970936073361891e-07, + "loss": 0.5057, + "step": 11445 + }, + { + "epoch": 0.81, + "grad_norm": 2.4941088358110255, + "learning_rate": 8.964369324867133e-07, + "loss": 0.444, + "step": 11446 + }, + { + "epoch": 0.81, + "grad_norm": 2.206323691941326, + "learning_rate": 8.957804744013438e-07, + "loss": 0.4645, + "step": 11447 + }, + { + "epoch": 0.81, + "grad_norm": 1.9917211199659994, + "learning_rate": 8.951242331147564e-07, + "loss": 0.5202, + "step": 11448 + }, + { + "epoch": 0.81, + "grad_norm": 1.6160296092094137, + "learning_rate": 8.944682086616163e-07, + "loss": 0.5354, + "step": 11449 + }, + { + "epoch": 0.81, + "grad_norm": 1.6780494878376804, + "learning_rate": 8.93812401076578e-07, + "loss": 0.53, + "step": 11450 + }, + { + "epoch": 0.81, + "grad_norm": 2.31782958415964, + "learning_rate": 8.931568103942801e-07, + "loss": 0.5317, + "step": 11451 + }, + { + "epoch": 0.81, + "grad_norm": 1.9703059611813696, + "learning_rate": 8.925014366493573e-07, + "loss": 0.5052, + "step": 11452 + }, + { + "epoch": 0.81, + "grad_norm": 1.6563945593461646, + "learning_rate": 8.91846279876426e-07, + "loss": 0.5841, + "step": 11453 + }, + { + "epoch": 0.81, + "grad_norm": 1.7951701273680511, + "learning_rate": 8.911913401100947e-07, + "loss": 0.4866, + "step": 11454 + }, + { + "epoch": 0.81, + "grad_norm": 1.5532410587993264, + "learning_rate": 8.905366173849589e-07, + "loss": 0.4605, + "step": 11455 + }, + { + "epoch": 0.81, + "grad_norm": 1.6650844940587148, + "learning_rate": 8.898821117356038e-07, + "loss": 0.4999, + "step": 11456 + }, + { + "epoch": 0.81, + "grad_norm": 1.656960214228648, + "learning_rate": 8.89227823196604e-07, + "loss": 0.4846, + "step": 11457 + }, + { + "epoch": 0.81, + "grad_norm": 3.992822390116767, + "learning_rate": 8.885737518025167e-07, + "loss": 0.4796, + "step": 11458 + }, + { + "epoch": 0.81, + "grad_norm": 1.7239979414563649, + "learning_rate": 8.879198975878983e-07, + "loss": 0.5789, + "step": 11459 + }, + { + "epoch": 0.81, + "grad_norm": 1.7357423401790106, + "learning_rate": 8.872662605872834e-07, + "loss": 0.5026, + "step": 11460 + }, + { + "epoch": 0.81, + "grad_norm": 1.707551336459569, + "learning_rate": 8.866128408352015e-07, + "loss": 0.51, + "step": 11461 + }, + { + "epoch": 0.81, + "grad_norm": 1.5934933300180776, + "learning_rate": 8.85959638366165e-07, + "loss": 0.4601, + "step": 11462 + }, + { + "epoch": 0.81, + "grad_norm": 1.659670916654916, + "learning_rate": 8.853066532146832e-07, + "loss": 0.5845, + "step": 11463 + }, + { + "epoch": 0.81, + "grad_norm": 1.5474338109099939, + "learning_rate": 8.846538854152448e-07, + "loss": 0.4624, + "step": 11464 + }, + { + "epoch": 0.81, + "grad_norm": 1.6079674831858195, + "learning_rate": 8.840013350023329e-07, + "loss": 0.5002, + "step": 11465 + }, + { + "epoch": 0.81, + "grad_norm": 1.8889093872796265, + "learning_rate": 8.833490020104179e-07, + "loss": 0.5344, + "step": 11466 + }, + { + "epoch": 0.81, + "grad_norm": 0.7429156056737085, + "learning_rate": 8.826968864739577e-07, + "loss": 0.4208, + "step": 11467 + }, + { + "epoch": 0.81, + "grad_norm": 1.7621530148233677, + "learning_rate": 8.820449884274001e-07, + "loss": 0.5372, + "step": 11468 + }, + { + "epoch": 0.81, + "grad_norm": 0.7679718145561856, + "learning_rate": 8.813933079051789e-07, + "loss": 0.43, + "step": 11469 + }, + { + "epoch": 0.81, + "grad_norm": 1.8348658213916487, + "learning_rate": 8.807418449417188e-07, + "loss": 0.4999, + "step": 11470 + }, + { + "epoch": 0.81, + "grad_norm": 2.01681997159082, + "learning_rate": 8.800905995714321e-07, + "loss": 0.6066, + "step": 11471 + }, + { + "epoch": 0.81, + "grad_norm": 1.9510206445409704, + "learning_rate": 8.794395718287202e-07, + "loss": 0.5249, + "step": 11472 + }, + { + "epoch": 0.81, + "grad_norm": 2.3534530468026187, + "learning_rate": 8.78788761747974e-07, + "loss": 0.5211, + "step": 11473 + }, + { + "epoch": 0.81, + "grad_norm": 1.7750022594290558, + "learning_rate": 8.781381693635671e-07, + "loss": 0.5252, + "step": 11474 + }, + { + "epoch": 0.81, + "grad_norm": 2.416616478384309, + "learning_rate": 8.77487794709872e-07, + "loss": 0.517, + "step": 11475 + }, + { + "epoch": 0.81, + "grad_norm": 0.6853277537672671, + "learning_rate": 8.768376378212384e-07, + "loss": 0.4125, + "step": 11476 + }, + { + "epoch": 0.81, + "grad_norm": 1.5516992524116755, + "learning_rate": 8.761876987320128e-07, + "loss": 0.4203, + "step": 11477 + }, + { + "epoch": 0.81, + "grad_norm": 0.723343585241408, + "learning_rate": 8.755379774765261e-07, + "loss": 0.418, + "step": 11478 + }, + { + "epoch": 0.81, + "grad_norm": 1.8529870511940283, + "learning_rate": 8.74888474089099e-07, + "loss": 0.5322, + "step": 11479 + }, + { + "epoch": 0.81, + "grad_norm": 1.5170711750233299, + "learning_rate": 8.742391886040413e-07, + "loss": 0.5327, + "step": 11480 + }, + { + "epoch": 0.81, + "grad_norm": 1.6600922148936235, + "learning_rate": 8.735901210556475e-07, + "loss": 0.4903, + "step": 11481 + }, + { + "epoch": 0.81, + "grad_norm": 2.1990118040736797, + "learning_rate": 8.729412714782082e-07, + "loss": 0.5902, + "step": 11482 + }, + { + "epoch": 0.81, + "grad_norm": 1.9267919002565872, + "learning_rate": 8.722926399059939e-07, + "loss": 0.4993, + "step": 11483 + }, + { + "epoch": 0.81, + "grad_norm": 1.813239365500518, + "learning_rate": 8.716442263732694e-07, + "loss": 0.5088, + "step": 11484 + }, + { + "epoch": 0.82, + "grad_norm": 1.7922215975103903, + "learning_rate": 8.70996030914285e-07, + "loss": 0.5502, + "step": 11485 + }, + { + "epoch": 0.82, + "grad_norm": 1.5544469128362726, + "learning_rate": 8.703480535632819e-07, + "loss": 0.5113, + "step": 11486 + }, + { + "epoch": 0.82, + "grad_norm": 1.6524990041165688, + "learning_rate": 8.697002943544886e-07, + "loss": 0.5278, + "step": 11487 + }, + { + "epoch": 0.82, + "grad_norm": 1.9136300929806342, + "learning_rate": 8.6905275332212e-07, + "loss": 0.5749, + "step": 11488 + }, + { + "epoch": 0.82, + "grad_norm": 1.6001608388819686, + "learning_rate": 8.684054305003825e-07, + "loss": 0.523, + "step": 11489 + }, + { + "epoch": 0.82, + "grad_norm": 3.5135796435306514, + "learning_rate": 8.677583259234695e-07, + "loss": 0.5798, + "step": 11490 + }, + { + "epoch": 0.82, + "grad_norm": 1.7531176205882582, + "learning_rate": 8.671114396255653e-07, + "loss": 0.4659, + "step": 11491 + }, + { + "epoch": 0.82, + "grad_norm": 2.4634712993660233, + "learning_rate": 8.66464771640837e-07, + "loss": 0.4793, + "step": 11492 + }, + { + "epoch": 0.82, + "grad_norm": 1.5767256450655862, + "learning_rate": 8.658183220034461e-07, + "loss": 0.4956, + "step": 11493 + }, + { + "epoch": 0.82, + "grad_norm": 1.6394903394199316, + "learning_rate": 8.651720907475419e-07, + "loss": 0.505, + "step": 11494 + }, + { + "epoch": 0.82, + "grad_norm": 1.687410782779506, + "learning_rate": 8.645260779072573e-07, + "loss": 0.5582, + "step": 11495 + }, + { + "epoch": 0.82, + "grad_norm": 2.173868809061579, + "learning_rate": 8.638802835167176e-07, + "loss": 0.5596, + "step": 11496 + }, + { + "epoch": 0.82, + "grad_norm": 1.707366935398623, + "learning_rate": 8.632347076100367e-07, + "loss": 0.5684, + "step": 11497 + }, + { + "epoch": 0.82, + "grad_norm": 1.5586250427886035, + "learning_rate": 8.625893502213173e-07, + "loss": 0.5454, + "step": 11498 + }, + { + "epoch": 0.82, + "grad_norm": 1.6356103383267382, + "learning_rate": 8.619442113846466e-07, + "loss": 0.5388, + "step": 11499 + }, + { + "epoch": 0.82, + "grad_norm": 2.311119373313283, + "learning_rate": 8.612992911341039e-07, + "loss": 0.5511, + "step": 11500 + }, + { + "epoch": 0.82, + "grad_norm": 0.7040610376391158, + "learning_rate": 8.606545895037565e-07, + "loss": 0.459, + "step": 11501 + }, + { + "epoch": 0.82, + "grad_norm": 3.082658154526252, + "learning_rate": 8.600101065276601e-07, + "loss": 0.519, + "step": 11502 + }, + { + "epoch": 0.82, + "grad_norm": 1.985940852463969, + "learning_rate": 8.593658422398594e-07, + "loss": 0.5403, + "step": 11503 + }, + { + "epoch": 0.82, + "grad_norm": 1.7944157440093698, + "learning_rate": 8.587217966743828e-07, + "loss": 0.4617, + "step": 11504 + }, + { + "epoch": 0.82, + "grad_norm": 2.306862298628854, + "learning_rate": 8.580779698652553e-07, + "loss": 0.5163, + "step": 11505 + }, + { + "epoch": 0.82, + "grad_norm": 1.820859079821516, + "learning_rate": 8.574343618464836e-07, + "loss": 0.5818, + "step": 11506 + }, + { + "epoch": 0.82, + "grad_norm": 1.8192055686064592, + "learning_rate": 8.567909726520657e-07, + "loss": 0.5013, + "step": 11507 + }, + { + "epoch": 0.82, + "grad_norm": 1.7073168096070146, + "learning_rate": 8.561478023159875e-07, + "loss": 0.5267, + "step": 11508 + }, + { + "epoch": 0.82, + "grad_norm": 1.750956375047228, + "learning_rate": 8.555048508722235e-07, + "loss": 0.4484, + "step": 11509 + }, + { + "epoch": 0.82, + "grad_norm": 1.6304771863865426, + "learning_rate": 8.548621183547384e-07, + "loss": 0.5345, + "step": 11510 + }, + { + "epoch": 0.82, + "grad_norm": 1.7308141324665218, + "learning_rate": 8.542196047974794e-07, + "loss": 0.4844, + "step": 11511 + }, + { + "epoch": 0.82, + "grad_norm": 1.5977599623795842, + "learning_rate": 8.535773102343909e-07, + "loss": 0.4633, + "step": 11512 + }, + { + "epoch": 0.82, + "grad_norm": 1.8062968555588848, + "learning_rate": 8.52935234699398e-07, + "loss": 0.5405, + "step": 11513 + }, + { + "epoch": 0.82, + "grad_norm": 1.5317284958786475, + "learning_rate": 8.522933782264175e-07, + "loss": 0.4734, + "step": 11514 + }, + { + "epoch": 0.82, + "grad_norm": 0.689423372575403, + "learning_rate": 8.516517408493557e-07, + "loss": 0.456, + "step": 11515 + }, + { + "epoch": 0.82, + "grad_norm": 1.461893448043425, + "learning_rate": 8.510103226021055e-07, + "loss": 0.4708, + "step": 11516 + }, + { + "epoch": 0.82, + "grad_norm": 1.422726554155183, + "learning_rate": 8.503691235185496e-07, + "loss": 0.498, + "step": 11517 + }, + { + "epoch": 0.82, + "grad_norm": 1.9341917794590062, + "learning_rate": 8.497281436325566e-07, + "loss": 0.4984, + "step": 11518 + }, + { + "epoch": 0.82, + "grad_norm": 1.9418836736921243, + "learning_rate": 8.490873829779855e-07, + "loss": 0.5049, + "step": 11519 + }, + { + "epoch": 0.82, + "grad_norm": 1.9655926574334055, + "learning_rate": 8.484468415886848e-07, + "loss": 0.5143, + "step": 11520 + }, + { + "epoch": 0.82, + "grad_norm": 1.629822552308648, + "learning_rate": 8.478065194984892e-07, + "loss": 0.5212, + "step": 11521 + }, + { + "epoch": 0.82, + "grad_norm": 2.4831620859711565, + "learning_rate": 8.471664167412236e-07, + "loss": 0.5015, + "step": 11522 + }, + { + "epoch": 0.82, + "grad_norm": 1.6858985873627381, + "learning_rate": 8.465265333506972e-07, + "loss": 0.5041, + "step": 11523 + }, + { + "epoch": 0.82, + "grad_norm": 2.3919290090708385, + "learning_rate": 8.458868693607158e-07, + "loss": 0.5583, + "step": 11524 + }, + { + "epoch": 0.82, + "grad_norm": 1.5324542921633564, + "learning_rate": 8.452474248050646e-07, + "loss": 0.4981, + "step": 11525 + }, + { + "epoch": 0.82, + "grad_norm": 1.9272217873365456, + "learning_rate": 8.446081997175232e-07, + "loss": 0.5556, + "step": 11526 + }, + { + "epoch": 0.82, + "grad_norm": 1.7172861798504175, + "learning_rate": 8.439691941318567e-07, + "loss": 0.4744, + "step": 11527 + }, + { + "epoch": 0.82, + "grad_norm": 1.9780276446129526, + "learning_rate": 8.433304080818211e-07, + "loss": 0.5384, + "step": 11528 + }, + { + "epoch": 0.82, + "grad_norm": 2.0104438520906216, + "learning_rate": 8.42691841601157e-07, + "loss": 0.4952, + "step": 11529 + }, + { + "epoch": 0.82, + "grad_norm": 1.7761761015789197, + "learning_rate": 8.420534947235953e-07, + "loss": 0.5429, + "step": 11530 + }, + { + "epoch": 0.82, + "grad_norm": 1.759222397283599, + "learning_rate": 8.414153674828596e-07, + "loss": 0.474, + "step": 11531 + }, + { + "epoch": 0.82, + "grad_norm": 2.2875978147326923, + "learning_rate": 8.407774599126539e-07, + "loss": 0.4985, + "step": 11532 + }, + { + "epoch": 0.82, + "grad_norm": 1.7878639170486843, + "learning_rate": 8.401397720466775e-07, + "loss": 0.4295, + "step": 11533 + }, + { + "epoch": 0.82, + "grad_norm": 0.7807324431314969, + "learning_rate": 8.395023039186117e-07, + "loss": 0.4311, + "step": 11534 + }, + { + "epoch": 0.82, + "grad_norm": 1.4920564002064747, + "learning_rate": 8.388650555621342e-07, + "loss": 0.4513, + "step": 11535 + }, + { + "epoch": 0.82, + "grad_norm": 10.182404370683514, + "learning_rate": 8.382280270109028e-07, + "loss": 0.5202, + "step": 11536 + }, + { + "epoch": 0.82, + "grad_norm": 1.5848791832093747, + "learning_rate": 8.375912182985696e-07, + "loss": 0.514, + "step": 11537 + }, + { + "epoch": 0.82, + "grad_norm": 2.494739828001238, + "learning_rate": 8.369546294587722e-07, + "loss": 0.5374, + "step": 11538 + }, + { + "epoch": 0.82, + "grad_norm": 2.151964869414437, + "learning_rate": 8.363182605251375e-07, + "loss": 0.5471, + "step": 11539 + }, + { + "epoch": 0.82, + "grad_norm": 1.706957449465364, + "learning_rate": 8.356821115312818e-07, + "loss": 0.4943, + "step": 11540 + }, + { + "epoch": 0.82, + "grad_norm": 1.8028750027748284, + "learning_rate": 8.350461825108053e-07, + "loss": 0.5558, + "step": 11541 + }, + { + "epoch": 0.82, + "grad_norm": 1.8784423025197514, + "learning_rate": 8.344104734973047e-07, + "loss": 0.5587, + "step": 11542 + }, + { + "epoch": 0.82, + "grad_norm": 2.216868332789322, + "learning_rate": 8.337749845243564e-07, + "loss": 0.509, + "step": 11543 + }, + { + "epoch": 0.82, + "grad_norm": 2.5583507085985717, + "learning_rate": 8.331397156255311e-07, + "loss": 0.4449, + "step": 11544 + }, + { + "epoch": 0.82, + "grad_norm": 1.6668821286616307, + "learning_rate": 8.32504666834385e-07, + "loss": 0.5473, + "step": 11545 + }, + { + "epoch": 0.82, + "grad_norm": 1.5236560093457887, + "learning_rate": 8.318698381844637e-07, + "loss": 0.5622, + "step": 11546 + }, + { + "epoch": 0.82, + "grad_norm": 1.9576424583108563, + "learning_rate": 8.312352297093024e-07, + "loss": 0.5772, + "step": 11547 + }, + { + "epoch": 0.82, + "grad_norm": 1.7300432879184895, + "learning_rate": 8.306008414424205e-07, + "loss": 0.5249, + "step": 11548 + }, + { + "epoch": 0.82, + "grad_norm": 1.9168756159085125, + "learning_rate": 8.299666734173301e-07, + "loss": 0.521, + "step": 11549 + }, + { + "epoch": 0.82, + "grad_norm": 1.5301044041229603, + "learning_rate": 8.293327256675299e-07, + "loss": 0.4391, + "step": 11550 + }, + { + "epoch": 0.82, + "grad_norm": 2.092994392897207, + "learning_rate": 8.286989982265076e-07, + "loss": 0.5878, + "step": 11551 + }, + { + "epoch": 0.82, + "grad_norm": 1.9989058805350945, + "learning_rate": 8.280654911277397e-07, + "loss": 0.5343, + "step": 11552 + }, + { + "epoch": 0.82, + "grad_norm": 1.7247896784974281, + "learning_rate": 8.274322044046868e-07, + "loss": 0.5484, + "step": 11553 + }, + { + "epoch": 0.82, + "grad_norm": 1.6367108020107815, + "learning_rate": 8.267991380908052e-07, + "loss": 0.5043, + "step": 11554 + }, + { + "epoch": 0.82, + "grad_norm": 1.8115965964767373, + "learning_rate": 8.261662922195329e-07, + "loss": 0.528, + "step": 11555 + }, + { + "epoch": 0.82, + "grad_norm": 2.040919127651128, + "learning_rate": 8.255336668243003e-07, + "loss": 0.5321, + "step": 11556 + }, + { + "epoch": 0.82, + "grad_norm": 1.6971381779736305, + "learning_rate": 8.249012619385238e-07, + "loss": 0.5059, + "step": 11557 + }, + { + "epoch": 0.82, + "grad_norm": 1.9282287982360757, + "learning_rate": 8.242690775956102e-07, + "loss": 0.5388, + "step": 11558 + }, + { + "epoch": 0.82, + "grad_norm": 1.8168832923298792, + "learning_rate": 8.236371138289545e-07, + "loss": 0.5172, + "step": 11559 + }, + { + "epoch": 0.82, + "grad_norm": 1.6344013476910677, + "learning_rate": 8.230053706719355e-07, + "loss": 0.512, + "step": 11560 + }, + { + "epoch": 0.82, + "grad_norm": 1.8039543126779392, + "learning_rate": 8.223738481579285e-07, + "loss": 0.5693, + "step": 11561 + }, + { + "epoch": 0.82, + "grad_norm": 1.5692278929487165, + "learning_rate": 8.217425463202899e-07, + "loss": 0.5551, + "step": 11562 + }, + { + "epoch": 0.82, + "grad_norm": 1.5922385459637651, + "learning_rate": 8.211114651923685e-07, + "loss": 0.4918, + "step": 11563 + }, + { + "epoch": 0.82, + "grad_norm": 1.5218168202771971, + "learning_rate": 8.204806048074975e-07, + "loss": 0.4982, + "step": 11564 + }, + { + "epoch": 0.82, + "grad_norm": 1.823193271348865, + "learning_rate": 8.198499651990055e-07, + "loss": 0.5585, + "step": 11565 + }, + { + "epoch": 0.82, + "grad_norm": 0.633560106202617, + "learning_rate": 8.192195464002011e-07, + "loss": 0.4236, + "step": 11566 + }, + { + "epoch": 0.82, + "grad_norm": 1.8244940176775135, + "learning_rate": 8.185893484443868e-07, + "loss": 0.5012, + "step": 11567 + }, + { + "epoch": 0.82, + "grad_norm": 2.0991864557002105, + "learning_rate": 8.179593713648521e-07, + "loss": 0.571, + "step": 11568 + }, + { + "epoch": 0.82, + "grad_norm": 1.6806775986254028, + "learning_rate": 8.173296151948734e-07, + "loss": 0.5234, + "step": 11569 + }, + { + "epoch": 0.82, + "grad_norm": 1.5381687093239398, + "learning_rate": 8.167000799677189e-07, + "loss": 0.4998, + "step": 11570 + }, + { + "epoch": 0.82, + "grad_norm": 1.9017646926837435, + "learning_rate": 8.160707657166389e-07, + "loss": 0.4939, + "step": 11571 + }, + { + "epoch": 0.82, + "grad_norm": 1.447012124211361, + "learning_rate": 8.154416724748798e-07, + "loss": 0.4716, + "step": 11572 + }, + { + "epoch": 0.82, + "grad_norm": 2.036569453031018, + "learning_rate": 8.148128002756705e-07, + "loss": 0.5719, + "step": 11573 + }, + { + "epoch": 0.82, + "grad_norm": 1.5365458957759495, + "learning_rate": 8.141841491522296e-07, + "loss": 0.5018, + "step": 11574 + }, + { + "epoch": 0.82, + "grad_norm": 1.5244753900794081, + "learning_rate": 8.135557191377658e-07, + "loss": 0.5149, + "step": 11575 + }, + { + "epoch": 0.82, + "grad_norm": 1.638326131750024, + "learning_rate": 8.129275102654749e-07, + "loss": 0.4747, + "step": 11576 + }, + { + "epoch": 0.82, + "grad_norm": 1.558480860457365, + "learning_rate": 8.122995225685415e-07, + "loss": 0.5375, + "step": 11577 + }, + { + "epoch": 0.82, + "grad_norm": 0.6920347985933656, + "learning_rate": 8.116717560801357e-07, + "loss": 0.4196, + "step": 11578 + }, + { + "epoch": 0.82, + "grad_norm": 1.7716018719761366, + "learning_rate": 8.110442108334199e-07, + "loss": 0.5003, + "step": 11579 + }, + { + "epoch": 0.82, + "grad_norm": 1.7713580366394237, + "learning_rate": 8.104168868615431e-07, + "loss": 0.526, + "step": 11580 + }, + { + "epoch": 0.82, + "grad_norm": 1.7611812733122423, + "learning_rate": 8.097897841976426e-07, + "loss": 0.5163, + "step": 11581 + }, + { + "epoch": 0.82, + "grad_norm": 1.4564373158717063, + "learning_rate": 8.091629028748454e-07, + "loss": 0.4693, + "step": 11582 + }, + { + "epoch": 0.82, + "grad_norm": 1.9603695796111504, + "learning_rate": 8.085362429262617e-07, + "loss": 0.5753, + "step": 11583 + }, + { + "epoch": 0.82, + "grad_norm": 0.7165583835320107, + "learning_rate": 8.079098043849981e-07, + "loss": 0.429, + "step": 11584 + }, + { + "epoch": 0.82, + "grad_norm": 1.5963338897454797, + "learning_rate": 8.072835872841428e-07, + "loss": 0.5743, + "step": 11585 + }, + { + "epoch": 0.82, + "grad_norm": 1.5930881542000306, + "learning_rate": 8.066575916567754e-07, + "loss": 0.4815, + "step": 11586 + }, + { + "epoch": 0.82, + "grad_norm": 1.762880942878839, + "learning_rate": 8.060318175359627e-07, + "loss": 0.4622, + "step": 11587 + }, + { + "epoch": 0.82, + "grad_norm": 1.6626947187831724, + "learning_rate": 8.054062649547606e-07, + "loss": 0.5603, + "step": 11588 + }, + { + "epoch": 0.82, + "grad_norm": 1.7182211689187945, + "learning_rate": 8.047809339462137e-07, + "loss": 0.5796, + "step": 11589 + }, + { + "epoch": 0.82, + "grad_norm": 1.5245330356016016, + "learning_rate": 8.041558245433512e-07, + "loss": 0.5426, + "step": 11590 + }, + { + "epoch": 0.82, + "grad_norm": 1.6512528106432813, + "learning_rate": 8.035309367791977e-07, + "loss": 0.5161, + "step": 11591 + }, + { + "epoch": 0.82, + "grad_norm": 1.548290467019442, + "learning_rate": 8.029062706867585e-07, + "loss": 0.5379, + "step": 11592 + }, + { + "epoch": 0.82, + "grad_norm": 1.6895400350456444, + "learning_rate": 8.022818262990329e-07, + "loss": 0.5261, + "step": 11593 + }, + { + "epoch": 0.82, + "grad_norm": 1.540972705674565, + "learning_rate": 8.016576036490026e-07, + "loss": 0.4474, + "step": 11594 + }, + { + "epoch": 0.82, + "grad_norm": 1.5222165027930799, + "learning_rate": 8.010336027696464e-07, + "loss": 0.5043, + "step": 11595 + }, + { + "epoch": 0.82, + "grad_norm": 1.9340380376426691, + "learning_rate": 8.004098236939217e-07, + "loss": 0.4998, + "step": 11596 + }, + { + "epoch": 0.82, + "grad_norm": 1.5827584122072866, + "learning_rate": 7.997862664547807e-07, + "loss": 0.4416, + "step": 11597 + }, + { + "epoch": 0.82, + "grad_norm": 1.533233801278073, + "learning_rate": 7.991629310851612e-07, + "loss": 0.5443, + "step": 11598 + }, + { + "epoch": 0.82, + "grad_norm": 1.9405392941711468, + "learning_rate": 7.985398176179898e-07, + "loss": 0.5162, + "step": 11599 + }, + { + "epoch": 0.82, + "grad_norm": 1.6719826690154074, + "learning_rate": 7.979169260861835e-07, + "loss": 0.531, + "step": 11600 + }, + { + "epoch": 0.82, + "grad_norm": 0.6516388496123298, + "learning_rate": 7.972942565226422e-07, + "loss": 0.4382, + "step": 11601 + }, + { + "epoch": 0.82, + "grad_norm": 6.206022581514963, + "learning_rate": 7.966718089602587e-07, + "loss": 0.5562, + "step": 11602 + }, + { + "epoch": 0.82, + "grad_norm": 0.6926899604236465, + "learning_rate": 7.960495834319132e-07, + "loss": 0.432, + "step": 11603 + }, + { + "epoch": 0.82, + "grad_norm": 1.6910516028015816, + "learning_rate": 7.954275799704742e-07, + "loss": 0.551, + "step": 11604 + }, + { + "epoch": 0.82, + "grad_norm": 1.4081090889849808, + "learning_rate": 7.948057986087971e-07, + "loss": 0.4868, + "step": 11605 + }, + { + "epoch": 0.82, + "grad_norm": 1.821448545448793, + "learning_rate": 7.941842393797266e-07, + "loss": 0.5431, + "step": 11606 + }, + { + "epoch": 0.82, + "grad_norm": 1.5977710457350438, + "learning_rate": 7.935629023160974e-07, + "loss": 0.5103, + "step": 11607 + }, + { + "epoch": 0.82, + "grad_norm": 2.3195857929261297, + "learning_rate": 7.929417874507272e-07, + "loss": 0.4837, + "step": 11608 + }, + { + "epoch": 0.82, + "grad_norm": 1.7172181770928112, + "learning_rate": 7.92320894816428e-07, + "loss": 0.5664, + "step": 11609 + }, + { + "epoch": 0.82, + "grad_norm": 1.8121893801856221, + "learning_rate": 7.917002244459965e-07, + "loss": 0.5004, + "step": 11610 + }, + { + "epoch": 0.82, + "grad_norm": 1.8091082745591047, + "learning_rate": 7.910797763722189e-07, + "loss": 0.4846, + "step": 11611 + }, + { + "epoch": 0.82, + "grad_norm": 0.6831671339817541, + "learning_rate": 7.904595506278701e-07, + "loss": 0.3926, + "step": 11612 + }, + { + "epoch": 0.82, + "grad_norm": 1.5865678877004648, + "learning_rate": 7.898395472457094e-07, + "loss": 0.5601, + "step": 11613 + }, + { + "epoch": 0.82, + "grad_norm": 7.083476568796669, + "learning_rate": 7.892197662584921e-07, + "loss": 0.4876, + "step": 11614 + }, + { + "epoch": 0.82, + "grad_norm": 3.065362923699742, + "learning_rate": 7.886002076989541e-07, + "loss": 0.4968, + "step": 11615 + }, + { + "epoch": 0.82, + "grad_norm": 2.134446674871776, + "learning_rate": 7.879808715998233e-07, + "loss": 0.5222, + "step": 11616 + }, + { + "epoch": 0.82, + "grad_norm": 1.7318602253573585, + "learning_rate": 7.873617579938148e-07, + "loss": 0.5781, + "step": 11617 + }, + { + "epoch": 0.82, + "grad_norm": 2.2824437068092203, + "learning_rate": 7.867428669136324e-07, + "loss": 0.554, + "step": 11618 + }, + { + "epoch": 0.82, + "grad_norm": 3.76347032225722, + "learning_rate": 7.861241983919699e-07, + "loss": 0.4886, + "step": 11619 + }, + { + "epoch": 0.82, + "grad_norm": 0.768536978835744, + "learning_rate": 7.855057524615029e-07, + "loss": 0.4298, + "step": 11620 + }, + { + "epoch": 0.82, + "grad_norm": 1.5351437618232844, + "learning_rate": 7.848875291549058e-07, + "loss": 0.5141, + "step": 11621 + }, + { + "epoch": 0.82, + "grad_norm": 1.9998997757821406, + "learning_rate": 7.842695285048302e-07, + "loss": 0.5117, + "step": 11622 + }, + { + "epoch": 0.82, + "grad_norm": 1.5340231765478325, + "learning_rate": 7.836517505439245e-07, + "loss": 0.5177, + "step": 11623 + }, + { + "epoch": 0.82, + "grad_norm": 1.5633604037210223, + "learning_rate": 7.830341953048182e-07, + "loss": 0.5362, + "step": 11624 + }, + { + "epoch": 0.82, + "grad_norm": 1.7255333076395678, + "learning_rate": 7.824168628201356e-07, + "loss": 0.5331, + "step": 11625 + }, + { + "epoch": 0.83, + "grad_norm": 1.730343933641218, + "learning_rate": 7.817997531224864e-07, + "loss": 0.5212, + "step": 11626 + }, + { + "epoch": 0.83, + "grad_norm": 2.0319644153570278, + "learning_rate": 7.811828662444664e-07, + "loss": 0.5393, + "step": 11627 + }, + { + "epoch": 0.83, + "grad_norm": 2.3741106493630455, + "learning_rate": 7.805662022186633e-07, + "loss": 0.5455, + "step": 11628 + }, + { + "epoch": 0.83, + "grad_norm": 0.6876200671960324, + "learning_rate": 7.799497610776502e-07, + "loss": 0.4272, + "step": 11629 + }, + { + "epoch": 0.83, + "grad_norm": 1.5761733020522175, + "learning_rate": 7.793335428539917e-07, + "loss": 0.5094, + "step": 11630 + }, + { + "epoch": 0.83, + "grad_norm": 1.5652783913939403, + "learning_rate": 7.787175475802355e-07, + "loss": 0.4975, + "step": 11631 + }, + { + "epoch": 0.83, + "grad_norm": 1.75350994889242, + "learning_rate": 7.78101775288922e-07, + "loss": 0.4947, + "step": 11632 + }, + { + "epoch": 0.83, + "grad_norm": 1.911594606584196, + "learning_rate": 7.774862260125787e-07, + "loss": 0.5336, + "step": 11633 + }, + { + "epoch": 0.83, + "grad_norm": 2.4189484961837233, + "learning_rate": 7.76870899783721e-07, + "loss": 0.4802, + "step": 11634 + }, + { + "epoch": 0.83, + "grad_norm": 1.7074099623664252, + "learning_rate": 7.762557966348522e-07, + "loss": 0.5423, + "step": 11635 + }, + { + "epoch": 0.83, + "grad_norm": 2.9068816906762174, + "learning_rate": 7.75640916598464e-07, + "loss": 0.4737, + "step": 11636 + }, + { + "epoch": 0.83, + "grad_norm": 1.7653720106601511, + "learning_rate": 7.750262597070379e-07, + "loss": 0.5738, + "step": 11637 + }, + { + "epoch": 0.83, + "grad_norm": 1.599249729705426, + "learning_rate": 7.7441182599304e-07, + "loss": 0.4909, + "step": 11638 + }, + { + "epoch": 0.83, + "grad_norm": 1.712011193510669, + "learning_rate": 7.73797615488927e-07, + "loss": 0.5507, + "step": 11639 + }, + { + "epoch": 0.83, + "grad_norm": 1.9836283556759475, + "learning_rate": 7.731836282271449e-07, + "loss": 0.6292, + "step": 11640 + }, + { + "epoch": 0.83, + "grad_norm": 0.6742657937245757, + "learning_rate": 7.725698642401258e-07, + "loss": 0.4303, + "step": 11641 + }, + { + "epoch": 0.83, + "grad_norm": 1.8265637047221108, + "learning_rate": 7.719563235602922e-07, + "loss": 0.5418, + "step": 11642 + }, + { + "epoch": 0.83, + "grad_norm": 1.9144035754325404, + "learning_rate": 7.7134300622005e-07, + "loss": 0.5406, + "step": 11643 + }, + { + "epoch": 0.83, + "grad_norm": 2.396229602480046, + "learning_rate": 7.707299122518008e-07, + "loss": 0.5448, + "step": 11644 + }, + { + "epoch": 0.83, + "grad_norm": 2.203802831481613, + "learning_rate": 7.701170416879273e-07, + "loss": 0.4956, + "step": 11645 + }, + { + "epoch": 0.83, + "grad_norm": 1.7071202008390076, + "learning_rate": 7.695043945608049e-07, + "loss": 0.4935, + "step": 11646 + }, + { + "epoch": 0.83, + "grad_norm": 1.579683455180068, + "learning_rate": 7.688919709027948e-07, + "loss": 0.5138, + "step": 11647 + }, + { + "epoch": 0.83, + "grad_norm": 2.2847832749340427, + "learning_rate": 7.682797707462475e-07, + "loss": 0.5756, + "step": 11648 + }, + { + "epoch": 0.83, + "grad_norm": 1.6810715770467088, + "learning_rate": 7.676677941235034e-07, + "loss": 0.4561, + "step": 11649 + }, + { + "epoch": 0.83, + "grad_norm": 1.646865140092308, + "learning_rate": 7.670560410668865e-07, + "loss": 0.5434, + "step": 11650 + }, + { + "epoch": 0.83, + "grad_norm": 2.040736753589956, + "learning_rate": 7.664445116087122e-07, + "loss": 0.5095, + "step": 11651 + }, + { + "epoch": 0.83, + "grad_norm": 1.7544003330955769, + "learning_rate": 7.658332057812839e-07, + "loss": 0.4945, + "step": 11652 + }, + { + "epoch": 0.83, + "grad_norm": 1.5459964850226318, + "learning_rate": 7.652221236168933e-07, + "loss": 0.4803, + "step": 11653 + }, + { + "epoch": 0.83, + "grad_norm": 1.866088022518965, + "learning_rate": 7.646112651478199e-07, + "loss": 0.5038, + "step": 11654 + }, + { + "epoch": 0.83, + "grad_norm": 3.7668379754432606, + "learning_rate": 7.640006304063302e-07, + "loss": 0.5736, + "step": 11655 + }, + { + "epoch": 0.83, + "grad_norm": 1.4672347136553276, + "learning_rate": 7.633902194246823e-07, + "loss": 0.5311, + "step": 11656 + }, + { + "epoch": 0.83, + "grad_norm": 1.7503436442404559, + "learning_rate": 7.627800322351175e-07, + "loss": 0.5341, + "step": 11657 + }, + { + "epoch": 0.83, + "grad_norm": 1.46854657094346, + "learning_rate": 7.621700688698691e-07, + "loss": 0.4446, + "step": 11658 + }, + { + "epoch": 0.83, + "grad_norm": 1.6209562288034756, + "learning_rate": 7.615603293611568e-07, + "loss": 0.5375, + "step": 11659 + }, + { + "epoch": 0.83, + "grad_norm": 1.6191683804996029, + "learning_rate": 7.609508137411914e-07, + "loss": 0.4721, + "step": 11660 + }, + { + "epoch": 0.83, + "grad_norm": 2.3084684252873737, + "learning_rate": 7.603415220421667e-07, + "loss": 0.5152, + "step": 11661 + }, + { + "epoch": 0.83, + "grad_norm": 1.688203648679798, + "learning_rate": 7.597324542962676e-07, + "loss": 0.4817, + "step": 11662 + }, + { + "epoch": 0.83, + "grad_norm": 1.6534277400131838, + "learning_rate": 7.591236105356703e-07, + "loss": 0.5285, + "step": 11663 + }, + { + "epoch": 0.83, + "grad_norm": 2.1415265862650403, + "learning_rate": 7.585149907925327e-07, + "loss": 0.5078, + "step": 11664 + }, + { + "epoch": 0.83, + "grad_norm": 0.6438326529136835, + "learning_rate": 7.579065950990072e-07, + "loss": 0.3849, + "step": 11665 + }, + { + "epoch": 0.83, + "grad_norm": 1.8160514703506496, + "learning_rate": 7.572984234872266e-07, + "loss": 0.4987, + "step": 11666 + }, + { + "epoch": 0.83, + "grad_norm": 1.9876701666238215, + "learning_rate": 7.566904759893217e-07, + "loss": 0.574, + "step": 11667 + }, + { + "epoch": 0.83, + "grad_norm": 1.5309174226186526, + "learning_rate": 7.560827526374037e-07, + "loss": 0.5258, + "step": 11668 + }, + { + "epoch": 0.83, + "grad_norm": 1.5382654283677692, + "learning_rate": 7.554752534635745e-07, + "loss": 0.4683, + "step": 11669 + }, + { + "epoch": 0.83, + "grad_norm": 0.7456717723821226, + "learning_rate": 7.548679784999253e-07, + "loss": 0.4317, + "step": 11670 + }, + { + "epoch": 0.83, + "grad_norm": 1.85388900257951, + "learning_rate": 7.542609277785335e-07, + "loss": 0.563, + "step": 11671 + }, + { + "epoch": 0.83, + "grad_norm": 0.6245303467254607, + "learning_rate": 7.536541013314669e-07, + "loss": 0.4372, + "step": 11672 + }, + { + "epoch": 0.83, + "grad_norm": 1.5765350339689714, + "learning_rate": 7.530474991907777e-07, + "loss": 0.5391, + "step": 11673 + }, + { + "epoch": 0.83, + "grad_norm": 1.5800575851806558, + "learning_rate": 7.524411213885119e-07, + "loss": 0.5124, + "step": 11674 + }, + { + "epoch": 0.83, + "grad_norm": 1.5159862773659922, + "learning_rate": 7.518349679566977e-07, + "loss": 0.5266, + "step": 11675 + }, + { + "epoch": 0.83, + "grad_norm": 1.7538921323799161, + "learning_rate": 7.512290389273558e-07, + "loss": 0.5369, + "step": 11676 + }, + { + "epoch": 0.83, + "grad_norm": 1.7768138885011255, + "learning_rate": 7.506233343324925e-07, + "loss": 0.5111, + "step": 11677 + }, + { + "epoch": 0.83, + "grad_norm": 1.7931886403188386, + "learning_rate": 7.500178542041037e-07, + "loss": 0.526, + "step": 11678 + }, + { + "epoch": 0.83, + "grad_norm": 2.4218894899436174, + "learning_rate": 7.494125985741735e-07, + "loss": 0.513, + "step": 11679 + }, + { + "epoch": 0.83, + "grad_norm": 1.5639437351817747, + "learning_rate": 7.48807567474672e-07, + "loss": 0.5349, + "step": 11680 + }, + { + "epoch": 0.83, + "grad_norm": 2.3311878373056234, + "learning_rate": 7.482027609375597e-07, + "loss": 0.5169, + "step": 11681 + }, + { + "epoch": 0.83, + "grad_norm": 1.6852592687408217, + "learning_rate": 7.475981789947845e-07, + "loss": 0.5484, + "step": 11682 + }, + { + "epoch": 0.83, + "grad_norm": 2.1266636588728067, + "learning_rate": 7.46993821678283e-07, + "loss": 0.5882, + "step": 11683 + }, + { + "epoch": 0.83, + "grad_norm": 2.5396118765331153, + "learning_rate": 7.463896890199785e-07, + "loss": 0.5134, + "step": 11684 + }, + { + "epoch": 0.83, + "grad_norm": 1.4796675396505603, + "learning_rate": 7.457857810517838e-07, + "loss": 0.4769, + "step": 11685 + }, + { + "epoch": 0.83, + "grad_norm": 0.6278306783613304, + "learning_rate": 7.451820978056007e-07, + "loss": 0.4323, + "step": 11686 + }, + { + "epoch": 0.83, + "grad_norm": 1.9784204780731713, + "learning_rate": 7.445786393133153e-07, + "loss": 0.5458, + "step": 11687 + }, + { + "epoch": 0.83, + "grad_norm": 1.7141498533420882, + "learning_rate": 7.439754056068054e-07, + "loss": 0.5525, + "step": 11688 + }, + { + "epoch": 0.83, + "grad_norm": 1.582235601773808, + "learning_rate": 7.433723967179363e-07, + "loss": 0.4731, + "step": 11689 + }, + { + "epoch": 0.83, + "grad_norm": 1.2928042213722004, + "learning_rate": 7.427696126785605e-07, + "loss": 0.4159, + "step": 11690 + }, + { + "epoch": 0.83, + "grad_norm": 1.8678097834504697, + "learning_rate": 7.421670535205206e-07, + "loss": 0.5181, + "step": 11691 + }, + { + "epoch": 0.83, + "grad_norm": 0.7249687656500796, + "learning_rate": 7.41564719275642e-07, + "loss": 0.4289, + "step": 11692 + }, + { + "epoch": 0.83, + "grad_norm": 1.748509234238804, + "learning_rate": 7.409626099757467e-07, + "loss": 0.5795, + "step": 11693 + }, + { + "epoch": 0.83, + "grad_norm": 1.7393461391644058, + "learning_rate": 7.403607256526363e-07, + "loss": 0.5287, + "step": 11694 + }, + { + "epoch": 0.83, + "grad_norm": 0.7861816181915153, + "learning_rate": 7.397590663381077e-07, + "loss": 0.4058, + "step": 11695 + }, + { + "epoch": 0.83, + "grad_norm": 1.7305382316717044, + "learning_rate": 7.391576320639388e-07, + "loss": 0.5865, + "step": 11696 + }, + { + "epoch": 0.83, + "grad_norm": 1.805045647217426, + "learning_rate": 7.385564228619029e-07, + "loss": 0.5594, + "step": 11697 + }, + { + "epoch": 0.83, + "grad_norm": 1.6263114811584123, + "learning_rate": 7.37955438763756e-07, + "loss": 0.5061, + "step": 11698 + }, + { + "epoch": 0.83, + "grad_norm": 1.8189516494093805, + "learning_rate": 7.373546798012448e-07, + "loss": 0.5522, + "step": 11699 + }, + { + "epoch": 0.83, + "grad_norm": 1.7546626891983064, + "learning_rate": 7.367541460061029e-07, + "loss": 0.5443, + "step": 11700 + }, + { + "epoch": 0.83, + "grad_norm": 1.648723629318573, + "learning_rate": 7.36153837410053e-07, + "loss": 0.529, + "step": 11701 + }, + { + "epoch": 0.83, + "grad_norm": 1.7434630171949477, + "learning_rate": 7.35553754044806e-07, + "loss": 0.4755, + "step": 11702 + }, + { + "epoch": 0.83, + "grad_norm": 1.6310135978226974, + "learning_rate": 7.349538959420577e-07, + "loss": 0.5216, + "step": 11703 + }, + { + "epoch": 0.83, + "grad_norm": 1.6369144801150737, + "learning_rate": 7.343542631334988e-07, + "loss": 0.5214, + "step": 11704 + }, + { + "epoch": 0.83, + "grad_norm": 1.9427740604092802, + "learning_rate": 7.337548556508006e-07, + "loss": 0.5765, + "step": 11705 + }, + { + "epoch": 0.83, + "grad_norm": 2.263104623753018, + "learning_rate": 7.331556735256268e-07, + "loss": 0.4668, + "step": 11706 + }, + { + "epoch": 0.83, + "grad_norm": 1.5766192705065845, + "learning_rate": 7.325567167896286e-07, + "loss": 0.5113, + "step": 11707 + }, + { + "epoch": 0.83, + "grad_norm": 1.6619438482655509, + "learning_rate": 7.319579854744446e-07, + "loss": 0.5174, + "step": 11708 + }, + { + "epoch": 0.83, + "grad_norm": 1.6401420964110671, + "learning_rate": 7.313594796117029e-07, + "loss": 0.5086, + "step": 11709 + }, + { + "epoch": 0.83, + "grad_norm": 1.5666994335476736, + "learning_rate": 7.307611992330166e-07, + "loss": 0.53, + "step": 11710 + }, + { + "epoch": 0.83, + "grad_norm": 2.1466156573936104, + "learning_rate": 7.301631443699896e-07, + "loss": 0.6009, + "step": 11711 + }, + { + "epoch": 0.83, + "grad_norm": 0.6810794632928244, + "learning_rate": 7.295653150542143e-07, + "loss": 0.396, + "step": 11712 + }, + { + "epoch": 0.83, + "grad_norm": 3.8436543582411202, + "learning_rate": 7.289677113172683e-07, + "loss": 0.5026, + "step": 11713 + }, + { + "epoch": 0.83, + "grad_norm": 2.0161864306361528, + "learning_rate": 7.283703331907216e-07, + "loss": 0.5017, + "step": 11714 + }, + { + "epoch": 0.83, + "grad_norm": 0.63188129273209, + "learning_rate": 7.277731807061261e-07, + "loss": 0.3829, + "step": 11715 + }, + { + "epoch": 0.83, + "grad_norm": 1.7288999046786706, + "learning_rate": 7.271762538950295e-07, + "loss": 0.5219, + "step": 11716 + }, + { + "epoch": 0.83, + "grad_norm": 1.7465654423386443, + "learning_rate": 7.265795527889602e-07, + "loss": 0.5216, + "step": 11717 + }, + { + "epoch": 0.83, + "grad_norm": 1.7742502570340188, + "learning_rate": 7.259830774194398e-07, + "loss": 0.5093, + "step": 11718 + }, + { + "epoch": 0.83, + "grad_norm": 0.7567646762093673, + "learning_rate": 7.253868278179749e-07, + "loss": 0.4389, + "step": 11719 + }, + { + "epoch": 0.83, + "grad_norm": 1.5092947792239753, + "learning_rate": 7.247908040160628e-07, + "loss": 0.4857, + "step": 11720 + }, + { + "epoch": 0.83, + "grad_norm": 1.7884496890540516, + "learning_rate": 7.241950060451875e-07, + "loss": 0.5178, + "step": 11721 + }, + { + "epoch": 0.83, + "grad_norm": 2.2801036048891934, + "learning_rate": 7.23599433936818e-07, + "loss": 0.6031, + "step": 11722 + }, + { + "epoch": 0.83, + "grad_norm": 2.0850986836143712, + "learning_rate": 7.230040877224193e-07, + "loss": 0.508, + "step": 11723 + }, + { + "epoch": 0.83, + "grad_norm": 1.6045627405522787, + "learning_rate": 7.224089674334362e-07, + "loss": 0.5198, + "step": 11724 + }, + { + "epoch": 0.83, + "grad_norm": 2.2923550449683745, + "learning_rate": 7.218140731013068e-07, + "loss": 0.5166, + "step": 11725 + }, + { + "epoch": 0.83, + "grad_norm": 1.4327126725620658, + "learning_rate": 7.212194047574522e-07, + "loss": 0.4933, + "step": 11726 + }, + { + "epoch": 0.83, + "grad_norm": 1.6047979555657583, + "learning_rate": 7.206249624332895e-07, + "loss": 0.4968, + "step": 11727 + }, + { + "epoch": 0.83, + "grad_norm": 1.9583177220634218, + "learning_rate": 7.200307461602158e-07, + "loss": 0.5262, + "step": 11728 + }, + { + "epoch": 0.83, + "grad_norm": 1.9930174277230923, + "learning_rate": 7.194367559696203e-07, + "loss": 0.4913, + "step": 11729 + }, + { + "epoch": 0.83, + "grad_norm": 1.8196360526875268, + "learning_rate": 7.188429918928802e-07, + "loss": 0.6062, + "step": 11730 + }, + { + "epoch": 0.83, + "grad_norm": 1.5371334674007648, + "learning_rate": 7.182494539613594e-07, + "loss": 0.5098, + "step": 11731 + }, + { + "epoch": 0.83, + "grad_norm": 1.6414438411408285, + "learning_rate": 7.176561422064127e-07, + "loss": 0.5391, + "step": 11732 + }, + { + "epoch": 0.83, + "grad_norm": 2.0270038142229927, + "learning_rate": 7.170630566593762e-07, + "loss": 0.598, + "step": 11733 + }, + { + "epoch": 0.83, + "grad_norm": 2.048513675185958, + "learning_rate": 7.164701973515841e-07, + "loss": 0.468, + "step": 11734 + }, + { + "epoch": 0.83, + "grad_norm": 1.887991490611884, + "learning_rate": 7.158775643143501e-07, + "loss": 0.5108, + "step": 11735 + }, + { + "epoch": 0.83, + "grad_norm": 1.6540049498822724, + "learning_rate": 7.152851575789793e-07, + "loss": 0.5183, + "step": 11736 + }, + { + "epoch": 0.83, + "grad_norm": 0.7176338322690999, + "learning_rate": 7.146929771767647e-07, + "loss": 0.4188, + "step": 11737 + }, + { + "epoch": 0.83, + "grad_norm": 1.6813935219342995, + "learning_rate": 7.141010231389883e-07, + "loss": 0.5576, + "step": 11738 + }, + { + "epoch": 0.83, + "grad_norm": 1.769808455240415, + "learning_rate": 7.135092954969192e-07, + "loss": 0.5327, + "step": 11739 + }, + { + "epoch": 0.83, + "grad_norm": 1.5875555614296115, + "learning_rate": 7.129177942818128e-07, + "loss": 0.5113, + "step": 11740 + }, + { + "epoch": 0.83, + "grad_norm": 1.6989571675350958, + "learning_rate": 7.123265195249152e-07, + "loss": 0.5461, + "step": 11741 + }, + { + "epoch": 0.83, + "grad_norm": 2.0254744464058625, + "learning_rate": 7.117354712574592e-07, + "loss": 0.5423, + "step": 11742 + }, + { + "epoch": 0.83, + "grad_norm": 5.411387811545026, + "learning_rate": 7.111446495106667e-07, + "loss": 0.4646, + "step": 11743 + }, + { + "epoch": 0.83, + "grad_norm": 1.5942877032876244, + "learning_rate": 7.105540543157474e-07, + "loss": 0.5043, + "step": 11744 + }, + { + "epoch": 0.83, + "grad_norm": 1.942628897669263, + "learning_rate": 7.099636857038949e-07, + "loss": 0.5779, + "step": 11745 + }, + { + "epoch": 0.83, + "grad_norm": 1.7365427836767406, + "learning_rate": 7.093735437062999e-07, + "loss": 0.5151, + "step": 11746 + }, + { + "epoch": 0.83, + "grad_norm": 1.8247248391417072, + "learning_rate": 7.087836283541317e-07, + "loss": 0.5827, + "step": 11747 + }, + { + "epoch": 0.83, + "grad_norm": 1.7062673841761788, + "learning_rate": 7.081939396785526e-07, + "loss": 0.4796, + "step": 11748 + }, + { + "epoch": 0.83, + "grad_norm": 1.5678610202714978, + "learning_rate": 7.076044777107122e-07, + "loss": 0.5373, + "step": 11749 + }, + { + "epoch": 0.83, + "grad_norm": 0.7739580235023502, + "learning_rate": 7.070152424817484e-07, + "loss": 0.4308, + "step": 11750 + }, + { + "epoch": 0.83, + "grad_norm": 1.625979876412168, + "learning_rate": 7.064262340227868e-07, + "loss": 0.538, + "step": 11751 + }, + { + "epoch": 0.83, + "grad_norm": 1.8792085023529423, + "learning_rate": 7.058374523649381e-07, + "loss": 0.544, + "step": 11752 + }, + { + "epoch": 0.83, + "grad_norm": 1.6038628638541297, + "learning_rate": 7.05248897539308e-07, + "loss": 0.4823, + "step": 11753 + }, + { + "epoch": 0.83, + "grad_norm": 1.9262273277634299, + "learning_rate": 7.046605695769825e-07, + "loss": 0.5916, + "step": 11754 + }, + { + "epoch": 0.83, + "grad_norm": 2.1283190651172297, + "learning_rate": 7.040724685090416e-07, + "loss": 0.5249, + "step": 11755 + }, + { + "epoch": 0.83, + "grad_norm": 0.7559554985882286, + "learning_rate": 7.034845943665469e-07, + "loss": 0.4194, + "step": 11756 + }, + { + "epoch": 0.83, + "grad_norm": 1.892010964689599, + "learning_rate": 7.028969471805563e-07, + "loss": 0.5329, + "step": 11757 + }, + { + "epoch": 0.83, + "grad_norm": 5.097428510997845, + "learning_rate": 7.023095269821101e-07, + "loss": 0.4895, + "step": 11758 + }, + { + "epoch": 0.83, + "grad_norm": 4.320470950834997, + "learning_rate": 7.017223338022361e-07, + "loss": 0.5046, + "step": 11759 + }, + { + "epoch": 0.83, + "grad_norm": 1.9057888539607624, + "learning_rate": 7.011353676719535e-07, + "loss": 0.5013, + "step": 11760 + }, + { + "epoch": 0.83, + "grad_norm": 1.831383180857901, + "learning_rate": 7.005486286222668e-07, + "loss": 0.5137, + "step": 11761 + }, + { + "epoch": 0.83, + "grad_norm": 1.6757386208362623, + "learning_rate": 6.999621166841714e-07, + "loss": 0.5172, + "step": 11762 + }, + { + "epoch": 0.83, + "grad_norm": 1.7101491615035325, + "learning_rate": 6.993758318886452e-07, + "loss": 0.5197, + "step": 11763 + }, + { + "epoch": 0.83, + "grad_norm": 2.106896453289844, + "learning_rate": 6.987897742666621e-07, + "loss": 0.5175, + "step": 11764 + }, + { + "epoch": 0.83, + "grad_norm": 1.8713826789143162, + "learning_rate": 6.982039438491766e-07, + "loss": 0.536, + "step": 11765 + }, + { + "epoch": 0.83, + "grad_norm": 1.401630796686222, + "learning_rate": 6.976183406671355e-07, + "loss": 0.4492, + "step": 11766 + }, + { + "epoch": 0.84, + "grad_norm": 2.1680493064548454, + "learning_rate": 6.970329647514723e-07, + "loss": 0.5305, + "step": 11767 + }, + { + "epoch": 0.84, + "grad_norm": 1.6551899334210425, + "learning_rate": 6.964478161331079e-07, + "loss": 0.4877, + "step": 11768 + }, + { + "epoch": 0.84, + "grad_norm": 2.6988076549411204, + "learning_rate": 6.958628948429536e-07, + "loss": 0.5393, + "step": 11769 + }, + { + "epoch": 0.84, + "grad_norm": 2.095467595918868, + "learning_rate": 6.952782009119041e-07, + "loss": 0.5247, + "step": 11770 + }, + { + "epoch": 0.84, + "grad_norm": 1.5297646366140598, + "learning_rate": 6.94693734370847e-07, + "loss": 0.4644, + "step": 11771 + }, + { + "epoch": 0.84, + "grad_norm": 1.819367966899226, + "learning_rate": 6.941094952506549e-07, + "loss": 0.5606, + "step": 11772 + }, + { + "epoch": 0.84, + "grad_norm": 1.7373888065032759, + "learning_rate": 6.935254835821897e-07, + "loss": 0.4914, + "step": 11773 + }, + { + "epoch": 0.84, + "grad_norm": 1.763793275290126, + "learning_rate": 6.929416993963018e-07, + "loss": 0.4764, + "step": 11774 + }, + { + "epoch": 0.84, + "grad_norm": 1.7552752126809743, + "learning_rate": 6.923581427238258e-07, + "loss": 0.5493, + "step": 11775 + }, + { + "epoch": 0.84, + "grad_norm": 1.4279978045574078, + "learning_rate": 6.917748135955904e-07, + "loss": 0.5151, + "step": 11776 + }, + { + "epoch": 0.84, + "grad_norm": 1.6198639900622387, + "learning_rate": 6.911917120424072e-07, + "loss": 0.5165, + "step": 11777 + }, + { + "epoch": 0.84, + "grad_norm": 3.8366133144857177, + "learning_rate": 6.906088380950776e-07, + "loss": 0.477, + "step": 11778 + }, + { + "epoch": 0.84, + "grad_norm": 1.7082065925953187, + "learning_rate": 6.900261917843915e-07, + "loss": 0.5018, + "step": 11779 + }, + { + "epoch": 0.84, + "grad_norm": 1.943017859534004, + "learning_rate": 6.894437731411263e-07, + "loss": 0.5038, + "step": 11780 + }, + { + "epoch": 0.84, + "grad_norm": 2.475951821202235, + "learning_rate": 6.888615821960482e-07, + "loss": 0.5211, + "step": 11781 + }, + { + "epoch": 0.84, + "grad_norm": 1.7970312721669137, + "learning_rate": 6.882796189799073e-07, + "loss": 0.5562, + "step": 11782 + }, + { + "epoch": 0.84, + "grad_norm": 1.5486480404484708, + "learning_rate": 6.876978835234494e-07, + "loss": 0.4806, + "step": 11783 + }, + { + "epoch": 0.84, + "grad_norm": 1.9950435751345448, + "learning_rate": 6.871163758573995e-07, + "loss": 0.4857, + "step": 11784 + }, + { + "epoch": 0.84, + "grad_norm": 0.7082899741949696, + "learning_rate": 6.865350960124773e-07, + "loss": 0.4111, + "step": 11785 + }, + { + "epoch": 0.84, + "grad_norm": 1.6036864199778271, + "learning_rate": 6.859540440193873e-07, + "loss": 0.4915, + "step": 11786 + }, + { + "epoch": 0.84, + "grad_norm": 1.6786206978896119, + "learning_rate": 6.853732199088226e-07, + "loss": 0.541, + "step": 11787 + }, + { + "epoch": 0.84, + "grad_norm": 1.4242563925928984, + "learning_rate": 6.847926237114654e-07, + "loss": 0.5074, + "step": 11788 + }, + { + "epoch": 0.84, + "grad_norm": 1.8207592999978541, + "learning_rate": 6.842122554579827e-07, + "loss": 0.4054, + "step": 11789 + }, + { + "epoch": 0.84, + "grad_norm": 2.3605272984539556, + "learning_rate": 6.836321151790326e-07, + "loss": 0.5551, + "step": 11790 + }, + { + "epoch": 0.84, + "grad_norm": 0.7493659316116227, + "learning_rate": 6.8305220290526e-07, + "loss": 0.4089, + "step": 11791 + }, + { + "epoch": 0.84, + "grad_norm": 0.6533537816039451, + "learning_rate": 6.824725186672992e-07, + "loss": 0.4014, + "step": 11792 + }, + { + "epoch": 0.84, + "grad_norm": 1.769666273987326, + "learning_rate": 6.818930624957682e-07, + "loss": 0.532, + "step": 11793 + }, + { + "epoch": 0.84, + "grad_norm": 2.063951855360864, + "learning_rate": 6.813138344212766e-07, + "loss": 0.5001, + "step": 11794 + }, + { + "epoch": 0.84, + "grad_norm": 2.720824439969221, + "learning_rate": 6.807348344744242e-07, + "loss": 0.5943, + "step": 11795 + }, + { + "epoch": 0.84, + "grad_norm": 1.531909646954632, + "learning_rate": 6.801560626857922e-07, + "loss": 0.4388, + "step": 11796 + }, + { + "epoch": 0.84, + "grad_norm": 1.544663812329272, + "learning_rate": 6.795775190859544e-07, + "loss": 0.5118, + "step": 11797 + }, + { + "epoch": 0.84, + "grad_norm": 1.7999083217658174, + "learning_rate": 6.789992037054721e-07, + "loss": 0.5675, + "step": 11798 + }, + { + "epoch": 0.84, + "grad_norm": 1.5653751865554992, + "learning_rate": 6.784211165748944e-07, + "loss": 0.4742, + "step": 11799 + }, + { + "epoch": 0.84, + "grad_norm": 1.6853735124624345, + "learning_rate": 6.778432577247557e-07, + "loss": 0.547, + "step": 11800 + }, + { + "epoch": 0.84, + "grad_norm": 1.8679263583925094, + "learning_rate": 6.772656271855815e-07, + "loss": 0.483, + "step": 11801 + }, + { + "epoch": 0.84, + "grad_norm": 0.7413442933179937, + "learning_rate": 6.766882249878842e-07, + "loss": 0.4582, + "step": 11802 + }, + { + "epoch": 0.84, + "grad_norm": 1.5829422641563322, + "learning_rate": 6.761110511621644e-07, + "loss": 0.5382, + "step": 11803 + }, + { + "epoch": 0.84, + "grad_norm": 1.7411922036799057, + "learning_rate": 6.755341057389109e-07, + "loss": 0.5112, + "step": 11804 + }, + { + "epoch": 0.84, + "grad_norm": 1.8442264120935636, + "learning_rate": 6.749573887485978e-07, + "loss": 0.5143, + "step": 11805 + }, + { + "epoch": 0.84, + "grad_norm": 1.5937919382873207, + "learning_rate": 6.743809002216923e-07, + "loss": 0.5729, + "step": 11806 + }, + { + "epoch": 0.84, + "grad_norm": 1.9332930127727608, + "learning_rate": 6.738046401886439e-07, + "loss": 0.5167, + "step": 11807 + }, + { + "epoch": 0.84, + "grad_norm": 2.0476774449399744, + "learning_rate": 6.732286086798934e-07, + "loss": 0.5583, + "step": 11808 + }, + { + "epoch": 0.84, + "grad_norm": 4.657767145257553, + "learning_rate": 6.726528057258691e-07, + "loss": 0.5648, + "step": 11809 + }, + { + "epoch": 0.84, + "grad_norm": 1.4810884138377158, + "learning_rate": 6.720772313569868e-07, + "loss": 0.4509, + "step": 11810 + }, + { + "epoch": 0.84, + "grad_norm": 1.8765133058832177, + "learning_rate": 6.71501885603651e-07, + "loss": 0.4816, + "step": 11811 + }, + { + "epoch": 0.84, + "grad_norm": 1.8296442511351512, + "learning_rate": 6.709267684962506e-07, + "loss": 0.5304, + "step": 11812 + }, + { + "epoch": 0.84, + "grad_norm": 1.7863413591773318, + "learning_rate": 6.703518800651692e-07, + "loss": 0.5904, + "step": 11813 + }, + { + "epoch": 0.84, + "grad_norm": 1.7746467775639743, + "learning_rate": 6.69777220340771e-07, + "loss": 0.5076, + "step": 11814 + }, + { + "epoch": 0.84, + "grad_norm": 2.536070109182169, + "learning_rate": 6.692027893534131e-07, + "loss": 0.5117, + "step": 11815 + }, + { + "epoch": 0.84, + "grad_norm": 1.7995584359711236, + "learning_rate": 6.686285871334386e-07, + "loss": 0.5692, + "step": 11816 + }, + { + "epoch": 0.84, + "grad_norm": 0.68664583102823, + "learning_rate": 6.680546137111787e-07, + "loss": 0.4137, + "step": 11817 + }, + { + "epoch": 0.84, + "grad_norm": 1.6183396077534355, + "learning_rate": 6.674808691169543e-07, + "loss": 0.5601, + "step": 11818 + }, + { + "epoch": 0.84, + "grad_norm": 1.9088050736116609, + "learning_rate": 6.669073533810693e-07, + "loss": 0.5101, + "step": 11819 + }, + { + "epoch": 0.84, + "grad_norm": 1.6324730051985417, + "learning_rate": 6.663340665338208e-07, + "loss": 0.4597, + "step": 11820 + }, + { + "epoch": 0.84, + "grad_norm": 1.8164582767815105, + "learning_rate": 6.657610086054911e-07, + "loss": 0.5746, + "step": 11821 + }, + { + "epoch": 0.84, + "grad_norm": 0.6580095526866813, + "learning_rate": 6.651881796263515e-07, + "loss": 0.3958, + "step": 11822 + }, + { + "epoch": 0.84, + "grad_norm": 1.7101971635891393, + "learning_rate": 6.646155796266618e-07, + "loss": 0.5541, + "step": 11823 + }, + { + "epoch": 0.84, + "grad_norm": 0.730223468332458, + "learning_rate": 6.64043208636665e-07, + "loss": 0.4331, + "step": 11824 + }, + { + "epoch": 0.84, + "grad_norm": 1.5920837223212028, + "learning_rate": 6.634710666866001e-07, + "loss": 0.5502, + "step": 11825 + }, + { + "epoch": 0.84, + "grad_norm": 2.0847140154454378, + "learning_rate": 6.62899153806687e-07, + "loss": 0.4955, + "step": 11826 + }, + { + "epoch": 0.84, + "grad_norm": 1.8709589804553248, + "learning_rate": 6.623274700271376e-07, + "loss": 0.4782, + "step": 11827 + }, + { + "epoch": 0.84, + "grad_norm": 1.5351706184914884, + "learning_rate": 6.617560153781471e-07, + "loss": 0.5164, + "step": 11828 + }, + { + "epoch": 0.84, + "grad_norm": 1.8726129105132676, + "learning_rate": 6.611847898899059e-07, + "loss": 0.5047, + "step": 11829 + }, + { + "epoch": 0.84, + "grad_norm": 2.8529257548143003, + "learning_rate": 6.606137935925854e-07, + "loss": 0.5403, + "step": 11830 + }, + { + "epoch": 0.84, + "grad_norm": 1.7395416207803651, + "learning_rate": 6.600430265163476e-07, + "loss": 0.505, + "step": 11831 + }, + { + "epoch": 0.84, + "grad_norm": 1.7575780476355187, + "learning_rate": 6.594724886913434e-07, + "loss": 0.5632, + "step": 11832 + }, + { + "epoch": 0.84, + "grad_norm": 1.6329065116169648, + "learning_rate": 6.589021801477097e-07, + "loss": 0.5447, + "step": 11833 + }, + { + "epoch": 0.84, + "grad_norm": 1.7818750994843486, + "learning_rate": 6.583321009155741e-07, + "loss": 0.5088, + "step": 11834 + }, + { + "epoch": 0.84, + "grad_norm": 1.6584674767098533, + "learning_rate": 6.577622510250459e-07, + "loss": 0.5509, + "step": 11835 + }, + { + "epoch": 0.84, + "grad_norm": 1.604826325853938, + "learning_rate": 6.571926305062314e-07, + "loss": 0.5138, + "step": 11836 + }, + { + "epoch": 0.84, + "grad_norm": 1.5207163780762065, + "learning_rate": 6.566232393892163e-07, + "loss": 0.481, + "step": 11837 + }, + { + "epoch": 0.84, + "grad_norm": 0.6034200038016776, + "learning_rate": 6.560540777040791e-07, + "loss": 0.4081, + "step": 11838 + }, + { + "epoch": 0.84, + "grad_norm": 1.9661035325886336, + "learning_rate": 6.554851454808853e-07, + "loss": 0.505, + "step": 11839 + }, + { + "epoch": 0.84, + "grad_norm": 1.598767838178624, + "learning_rate": 6.549164427496868e-07, + "loss": 0.452, + "step": 11840 + }, + { + "epoch": 0.84, + "grad_norm": 1.6332809514699482, + "learning_rate": 6.543479695405264e-07, + "loss": 0.512, + "step": 11841 + }, + { + "epoch": 0.84, + "grad_norm": 1.8424738837900463, + "learning_rate": 6.5377972588343e-07, + "loss": 0.5637, + "step": 11842 + }, + { + "epoch": 0.84, + "grad_norm": 2.1820938830766266, + "learning_rate": 6.532117118084152e-07, + "loss": 0.4679, + "step": 11843 + }, + { + "epoch": 0.84, + "grad_norm": 2.0999802046232223, + "learning_rate": 6.526439273454871e-07, + "loss": 0.4478, + "step": 11844 + }, + { + "epoch": 0.84, + "grad_norm": 2.3500281042325892, + "learning_rate": 6.520763725246376e-07, + "loss": 0.5401, + "step": 11845 + }, + { + "epoch": 0.84, + "grad_norm": 0.6966234289956624, + "learning_rate": 6.515090473758468e-07, + "loss": 0.4264, + "step": 11846 + }, + { + "epoch": 0.84, + "grad_norm": 2.1480606844986663, + "learning_rate": 6.509419519290832e-07, + "loss": 0.5295, + "step": 11847 + }, + { + "epoch": 0.84, + "grad_norm": 1.4980252847211433, + "learning_rate": 6.503750862143027e-07, + "loss": 0.5197, + "step": 11848 + }, + { + "epoch": 0.84, + "grad_norm": 1.8123147851797627, + "learning_rate": 6.498084502614482e-07, + "loss": 0.4911, + "step": 11849 + }, + { + "epoch": 0.84, + "grad_norm": 1.5384462825794913, + "learning_rate": 6.492420441004516e-07, + "loss": 0.4769, + "step": 11850 + }, + { + "epoch": 0.84, + "grad_norm": 1.6117337797167215, + "learning_rate": 6.486758677612326e-07, + "loss": 0.4278, + "step": 11851 + }, + { + "epoch": 0.84, + "grad_norm": 1.6767912263132492, + "learning_rate": 6.481099212736986e-07, + "loss": 0.4665, + "step": 11852 + }, + { + "epoch": 0.84, + "grad_norm": 1.6964713794386312, + "learning_rate": 6.475442046677455e-07, + "loss": 0.5916, + "step": 11853 + }, + { + "epoch": 0.84, + "grad_norm": 0.7185875338736285, + "learning_rate": 6.46978717973254e-07, + "loss": 0.4387, + "step": 11854 + }, + { + "epoch": 0.84, + "grad_norm": 1.4792906721573698, + "learning_rate": 6.464134612200984e-07, + "loss": 0.4902, + "step": 11855 + }, + { + "epoch": 0.84, + "grad_norm": 1.8401960606002405, + "learning_rate": 6.45848434438135e-07, + "loss": 0.5083, + "step": 11856 + }, + { + "epoch": 0.84, + "grad_norm": 1.786115361437105, + "learning_rate": 6.452836376572125e-07, + "loss": 0.5482, + "step": 11857 + }, + { + "epoch": 0.84, + "grad_norm": 2.8057932812776394, + "learning_rate": 6.44719070907161e-07, + "loss": 0.5535, + "step": 11858 + }, + { + "epoch": 0.84, + "grad_norm": 1.5058202220882033, + "learning_rate": 6.441547342178089e-07, + "loss": 0.5292, + "step": 11859 + }, + { + "epoch": 0.84, + "grad_norm": 1.5140274210519358, + "learning_rate": 6.435906276189619e-07, + "loss": 0.4174, + "step": 11860 + }, + { + "epoch": 0.84, + "grad_norm": 1.9255255357109917, + "learning_rate": 6.430267511404187e-07, + "loss": 0.4611, + "step": 11861 + }, + { + "epoch": 0.84, + "grad_norm": 1.6891356915749074, + "learning_rate": 6.424631048119679e-07, + "loss": 0.4916, + "step": 11862 + }, + { + "epoch": 0.84, + "grad_norm": 1.7677709698256114, + "learning_rate": 6.418996886633799e-07, + "loss": 0.5147, + "step": 11863 + }, + { + "epoch": 0.84, + "grad_norm": 1.5412107486755593, + "learning_rate": 6.41336502724419e-07, + "loss": 0.4995, + "step": 11864 + }, + { + "epoch": 0.84, + "grad_norm": 2.722550127633212, + "learning_rate": 6.407735470248305e-07, + "loss": 0.5626, + "step": 11865 + }, + { + "epoch": 0.84, + "grad_norm": 1.857630425647706, + "learning_rate": 6.40210821594357e-07, + "loss": 0.5029, + "step": 11866 + }, + { + "epoch": 0.84, + "grad_norm": 1.758628626374555, + "learning_rate": 6.396483264627202e-07, + "loss": 0.527, + "step": 11867 + }, + { + "epoch": 0.84, + "grad_norm": 1.8544266035428565, + "learning_rate": 6.390860616596328e-07, + "loss": 0.4962, + "step": 11868 + }, + { + "epoch": 0.84, + "grad_norm": 1.5501200208790278, + "learning_rate": 6.385240272147969e-07, + "loss": 0.5528, + "step": 11869 + }, + { + "epoch": 0.84, + "grad_norm": 5.362444374529221, + "learning_rate": 6.379622231579008e-07, + "loss": 0.5197, + "step": 11870 + }, + { + "epoch": 0.84, + "grad_norm": 2.0678746217594144, + "learning_rate": 6.374006495186214e-07, + "loss": 0.5624, + "step": 11871 + }, + { + "epoch": 0.84, + "grad_norm": 2.0727872957261932, + "learning_rate": 6.368393063266209e-07, + "loss": 0.5548, + "step": 11872 + }, + { + "epoch": 0.84, + "grad_norm": 0.6897554930141901, + "learning_rate": 6.362781936115531e-07, + "loss": 0.4188, + "step": 11873 + }, + { + "epoch": 0.84, + "grad_norm": 1.6950268731589517, + "learning_rate": 6.357173114030574e-07, + "loss": 0.508, + "step": 11874 + }, + { + "epoch": 0.84, + "grad_norm": 1.4575225304125996, + "learning_rate": 6.351566597307612e-07, + "loss": 0.5274, + "step": 11875 + }, + { + "epoch": 0.84, + "grad_norm": 1.570234943046175, + "learning_rate": 6.345962386242805e-07, + "loss": 0.5218, + "step": 11876 + }, + { + "epoch": 0.84, + "grad_norm": 2.1954854015289182, + "learning_rate": 6.340360481132185e-07, + "loss": 0.5483, + "step": 11877 + }, + { + "epoch": 0.84, + "grad_norm": 1.668535474698413, + "learning_rate": 6.334760882271673e-07, + "loss": 0.5079, + "step": 11878 + }, + { + "epoch": 0.84, + "grad_norm": 1.7881519330911249, + "learning_rate": 6.329163589957038e-07, + "loss": 0.5385, + "step": 11879 + }, + { + "epoch": 0.84, + "grad_norm": 2.69226515510854, + "learning_rate": 6.323568604483966e-07, + "loss": 0.4764, + "step": 11880 + }, + { + "epoch": 0.84, + "grad_norm": 2.866313735154939, + "learning_rate": 6.31797592614799e-07, + "loss": 0.51, + "step": 11881 + }, + { + "epoch": 0.84, + "grad_norm": 2.6848172023794215, + "learning_rate": 6.312385555244538e-07, + "loss": 0.5167, + "step": 11882 + }, + { + "epoch": 0.84, + "grad_norm": 1.6434595154611973, + "learning_rate": 6.306797492068934e-07, + "loss": 0.4714, + "step": 11883 + }, + { + "epoch": 0.84, + "grad_norm": 1.74888765048661, + "learning_rate": 6.301211736916307e-07, + "loss": 0.4888, + "step": 11884 + }, + { + "epoch": 0.84, + "grad_norm": 1.7380916045639057, + "learning_rate": 6.295628290081779e-07, + "loss": 0.534, + "step": 11885 + }, + { + "epoch": 0.84, + "grad_norm": 0.6536593219521794, + "learning_rate": 6.29004715186024e-07, + "loss": 0.3836, + "step": 11886 + }, + { + "epoch": 0.84, + "grad_norm": 1.6035103266869262, + "learning_rate": 6.284468322546533e-07, + "loss": 0.4853, + "step": 11887 + }, + { + "epoch": 0.84, + "grad_norm": 1.604382490559528, + "learning_rate": 6.278891802435311e-07, + "loss": 0.5318, + "step": 11888 + }, + { + "epoch": 0.84, + "grad_norm": 1.6459072424118653, + "learning_rate": 6.273317591821188e-07, + "loss": 0.4513, + "step": 11889 + }, + { + "epoch": 0.84, + "grad_norm": 1.5905258953822918, + "learning_rate": 6.267745690998606e-07, + "loss": 0.5586, + "step": 11890 + }, + { + "epoch": 0.84, + "grad_norm": 1.6832761236489961, + "learning_rate": 6.262176100261868e-07, + "loss": 0.5091, + "step": 11891 + }, + { + "epoch": 0.84, + "grad_norm": 1.4947962450590664, + "learning_rate": 6.256608819905191e-07, + "loss": 0.5339, + "step": 11892 + }, + { + "epoch": 0.84, + "grad_norm": 1.7357471644104834, + "learning_rate": 6.251043850222666e-07, + "loss": 0.4761, + "step": 11893 + }, + { + "epoch": 0.84, + "grad_norm": 2.0866853312912004, + "learning_rate": 6.245481191508251e-07, + "loss": 0.5048, + "step": 11894 + }, + { + "epoch": 0.84, + "grad_norm": 1.709380097923953, + "learning_rate": 6.239920844055757e-07, + "loss": 0.5699, + "step": 11895 + }, + { + "epoch": 0.84, + "grad_norm": 1.6665780839320476, + "learning_rate": 6.23436280815895e-07, + "loss": 0.5438, + "step": 11896 + }, + { + "epoch": 0.84, + "grad_norm": 1.6160611268580256, + "learning_rate": 6.228807084111388e-07, + "loss": 0.4958, + "step": 11897 + }, + { + "epoch": 0.84, + "grad_norm": 1.8223511085625619, + "learning_rate": 6.223253672206547e-07, + "loss": 0.5339, + "step": 11898 + }, + { + "epoch": 0.84, + "grad_norm": 1.5225139495983389, + "learning_rate": 6.217702572737789e-07, + "loss": 0.477, + "step": 11899 + }, + { + "epoch": 0.84, + "grad_norm": 1.734033256675826, + "learning_rate": 6.212153785998331e-07, + "loss": 0.586, + "step": 11900 + }, + { + "epoch": 0.84, + "grad_norm": 1.9344618827641535, + "learning_rate": 6.206607312281299e-07, + "loss": 0.5075, + "step": 11901 + }, + { + "epoch": 0.84, + "grad_norm": 1.8026786266052433, + "learning_rate": 6.201063151879644e-07, + "loss": 0.5429, + "step": 11902 + }, + { + "epoch": 0.84, + "grad_norm": 1.5226500493783643, + "learning_rate": 6.195521305086249e-07, + "loss": 0.5268, + "step": 11903 + }, + { + "epoch": 0.84, + "grad_norm": 1.854557169301373, + "learning_rate": 6.189981772193848e-07, + "loss": 0.5006, + "step": 11904 + }, + { + "epoch": 0.84, + "grad_norm": 1.8874139656557904, + "learning_rate": 6.184444553495056e-07, + "loss": 0.5288, + "step": 11905 + }, + { + "epoch": 0.84, + "grad_norm": 2.236671700749139, + "learning_rate": 6.178909649282383e-07, + "loss": 0.4426, + "step": 11906 + }, + { + "epoch": 0.84, + "grad_norm": 1.575524762018905, + "learning_rate": 6.173377059848168e-07, + "loss": 0.4998, + "step": 11907 + }, + { + "epoch": 0.85, + "grad_norm": 1.627295515810354, + "learning_rate": 6.167846785484699e-07, + "loss": 0.5116, + "step": 11908 + }, + { + "epoch": 0.85, + "grad_norm": 1.9582523217877705, + "learning_rate": 6.162318826484081e-07, + "loss": 0.6021, + "step": 11909 + }, + { + "epoch": 0.85, + "grad_norm": 2.263550606160613, + "learning_rate": 6.156793183138321e-07, + "loss": 0.4504, + "step": 11910 + }, + { + "epoch": 0.85, + "grad_norm": 1.8356778064239916, + "learning_rate": 6.151269855739306e-07, + "loss": 0.5238, + "step": 11911 + }, + { + "epoch": 0.85, + "grad_norm": 1.6623013888868055, + "learning_rate": 6.145748844578797e-07, + "loss": 0.495, + "step": 11912 + }, + { + "epoch": 0.85, + "grad_norm": 1.7036969898522185, + "learning_rate": 6.140230149948446e-07, + "loss": 0.5298, + "step": 11913 + }, + { + "epoch": 0.85, + "grad_norm": 1.8218746057944, + "learning_rate": 6.134713772139733e-07, + "loss": 0.5096, + "step": 11914 + }, + { + "epoch": 0.85, + "grad_norm": 1.7135750983967926, + "learning_rate": 6.129199711444095e-07, + "loss": 0.5314, + "step": 11915 + }, + { + "epoch": 0.85, + "grad_norm": 0.6584490172985701, + "learning_rate": 6.123687968152775e-07, + "loss": 0.409, + "step": 11916 + }, + { + "epoch": 0.85, + "grad_norm": 1.7154526498373748, + "learning_rate": 6.118178542556924e-07, + "loss": 0.5073, + "step": 11917 + }, + { + "epoch": 0.85, + "grad_norm": 0.6440051895600025, + "learning_rate": 6.112671434947581e-07, + "loss": 0.4247, + "step": 11918 + }, + { + "epoch": 0.85, + "grad_norm": 1.709970979397856, + "learning_rate": 6.107166645615636e-07, + "loss": 0.5508, + "step": 11919 + }, + { + "epoch": 0.85, + "grad_norm": 1.6024044902431125, + "learning_rate": 6.101664174851895e-07, + "loss": 0.5251, + "step": 11920 + }, + { + "epoch": 0.85, + "grad_norm": 2.2482844914840583, + "learning_rate": 6.096164022946988e-07, + "loss": 0.4883, + "step": 11921 + }, + { + "epoch": 0.85, + "grad_norm": 1.7715679695075872, + "learning_rate": 6.090666190191463e-07, + "loss": 0.5123, + "step": 11922 + }, + { + "epoch": 0.85, + "grad_norm": 1.7937842322718651, + "learning_rate": 6.085170676875734e-07, + "loss": 0.5812, + "step": 11923 + }, + { + "epoch": 0.85, + "grad_norm": 2.247826295659756, + "learning_rate": 6.079677483290103e-07, + "loss": 0.5691, + "step": 11924 + }, + { + "epoch": 0.85, + "grad_norm": 1.8899982859448592, + "learning_rate": 6.074186609724714e-07, + "loss": 0.5168, + "step": 11925 + }, + { + "epoch": 0.85, + "grad_norm": 1.7898727534449974, + "learning_rate": 6.068698056469636e-07, + "loss": 0.5182, + "step": 11926 + }, + { + "epoch": 0.85, + "grad_norm": 2.253929494355518, + "learning_rate": 6.063211823814796e-07, + "loss": 0.4655, + "step": 11927 + }, + { + "epoch": 0.85, + "grad_norm": 0.7134080675395075, + "learning_rate": 6.057727912049976e-07, + "loss": 0.4271, + "step": 11928 + }, + { + "epoch": 0.85, + "grad_norm": 1.6857481343498593, + "learning_rate": 6.052246321464867e-07, + "loss": 0.5506, + "step": 11929 + }, + { + "epoch": 0.85, + "grad_norm": 2.297299773356219, + "learning_rate": 6.046767052349017e-07, + "loss": 0.5586, + "step": 11930 + }, + { + "epoch": 0.85, + "grad_norm": 2.2570031003038404, + "learning_rate": 6.041290104991881e-07, + "loss": 0.5294, + "step": 11931 + }, + { + "epoch": 0.85, + "grad_norm": 1.5042563658450654, + "learning_rate": 6.035815479682744e-07, + "loss": 0.4538, + "step": 11932 + }, + { + "epoch": 0.85, + "grad_norm": 1.7206443971645515, + "learning_rate": 6.030343176710801e-07, + "loss": 0.4968, + "step": 11933 + }, + { + "epoch": 0.85, + "grad_norm": 1.7074437828615119, + "learning_rate": 6.024873196365122e-07, + "loss": 0.4911, + "step": 11934 + }, + { + "epoch": 0.85, + "grad_norm": 1.7055227661598962, + "learning_rate": 6.019405538934647e-07, + "loss": 0.5449, + "step": 11935 + }, + { + "epoch": 0.85, + "grad_norm": 1.5237698533417183, + "learning_rate": 6.013940204708213e-07, + "loss": 0.4931, + "step": 11936 + }, + { + "epoch": 0.85, + "grad_norm": 1.7399586950096302, + "learning_rate": 6.008477193974477e-07, + "loss": 0.5654, + "step": 11937 + }, + { + "epoch": 0.85, + "grad_norm": 0.6858261680749975, + "learning_rate": 6.003016507022064e-07, + "loss": 0.4318, + "step": 11938 + }, + { + "epoch": 0.85, + "grad_norm": 1.7514194207823826, + "learning_rate": 5.997558144139387e-07, + "loss": 0.5156, + "step": 11939 + }, + { + "epoch": 0.85, + "grad_norm": 1.683721248888722, + "learning_rate": 5.992102105614794e-07, + "loss": 0.449, + "step": 11940 + }, + { + "epoch": 0.85, + "grad_norm": 2.0868539272656466, + "learning_rate": 5.986648391736482e-07, + "loss": 0.4817, + "step": 11941 + }, + { + "epoch": 0.85, + "grad_norm": 2.0683465755366797, + "learning_rate": 5.981197002792544e-07, + "loss": 0.5539, + "step": 11942 + }, + { + "epoch": 0.85, + "grad_norm": 1.6826689293171189, + "learning_rate": 5.975747939070942e-07, + "loss": 0.5521, + "step": 11943 + }, + { + "epoch": 0.85, + "grad_norm": 1.6440222620061942, + "learning_rate": 5.97030120085949e-07, + "loss": 0.5017, + "step": 11944 + }, + { + "epoch": 0.85, + "grad_norm": 1.6198271648457239, + "learning_rate": 5.964856788445944e-07, + "loss": 0.4461, + "step": 11945 + }, + { + "epoch": 0.85, + "grad_norm": 1.4337266230037986, + "learning_rate": 5.95941470211786e-07, + "loss": 0.4649, + "step": 11946 + }, + { + "epoch": 0.85, + "grad_norm": 0.7296666224437548, + "learning_rate": 5.953974942162721e-07, + "loss": 0.4145, + "step": 11947 + }, + { + "epoch": 0.85, + "grad_norm": 1.7718148746362548, + "learning_rate": 5.94853750886788e-07, + "loss": 0.5065, + "step": 11948 + }, + { + "epoch": 0.85, + "grad_norm": 1.6373547570398883, + "learning_rate": 5.94310240252055e-07, + "loss": 0.503, + "step": 11949 + }, + { + "epoch": 0.85, + "grad_norm": 2.7284074339036697, + "learning_rate": 5.937669623407849e-07, + "loss": 0.5455, + "step": 11950 + }, + { + "epoch": 0.85, + "grad_norm": 1.6260004312325043, + "learning_rate": 5.932239171816728e-07, + "loss": 0.4523, + "step": 11951 + }, + { + "epoch": 0.85, + "grad_norm": 3.713113714208407, + "learning_rate": 5.926811048034059e-07, + "loss": 0.4417, + "step": 11952 + }, + { + "epoch": 0.85, + "grad_norm": 1.6179873437298362, + "learning_rate": 5.921385252346567e-07, + "loss": 0.5384, + "step": 11953 + }, + { + "epoch": 0.85, + "grad_norm": 1.7066919621697534, + "learning_rate": 5.915961785040869e-07, + "loss": 0.5036, + "step": 11954 + }, + { + "epoch": 0.85, + "grad_norm": 0.6712154905853225, + "learning_rate": 5.910540646403456e-07, + "loss": 0.4154, + "step": 11955 + }, + { + "epoch": 0.85, + "grad_norm": 1.765034413512885, + "learning_rate": 5.905121836720656e-07, + "loss": 0.4636, + "step": 11956 + }, + { + "epoch": 0.85, + "grad_norm": 1.628945460935493, + "learning_rate": 5.899705356278762e-07, + "loss": 0.5094, + "step": 11957 + }, + { + "epoch": 0.85, + "grad_norm": 1.7282870711030984, + "learning_rate": 5.89429120536385e-07, + "loss": 0.5906, + "step": 11958 + }, + { + "epoch": 0.85, + "grad_norm": 1.9878310463226856, + "learning_rate": 5.888879384261925e-07, + "loss": 0.4686, + "step": 11959 + }, + { + "epoch": 0.85, + "grad_norm": 1.729480354191174, + "learning_rate": 5.883469893258864e-07, + "loss": 0.4723, + "step": 11960 + }, + { + "epoch": 0.85, + "grad_norm": 3.187639425292521, + "learning_rate": 5.878062732640422e-07, + "loss": 0.4884, + "step": 11961 + }, + { + "epoch": 0.85, + "grad_norm": 1.6403075558053284, + "learning_rate": 5.872657902692198e-07, + "loss": 0.5035, + "step": 11962 + }, + { + "epoch": 0.85, + "grad_norm": 1.7411309215424438, + "learning_rate": 5.867255403699707e-07, + "loss": 0.5306, + "step": 11963 + }, + { + "epoch": 0.85, + "grad_norm": 1.903703156139573, + "learning_rate": 5.861855235948327e-07, + "loss": 0.5621, + "step": 11964 + }, + { + "epoch": 0.85, + "grad_norm": 1.8823160819244178, + "learning_rate": 5.856457399723314e-07, + "loss": 0.5593, + "step": 11965 + }, + { + "epoch": 0.85, + "grad_norm": 1.598334705149845, + "learning_rate": 5.851061895309812e-07, + "loss": 0.4806, + "step": 11966 + }, + { + "epoch": 0.85, + "grad_norm": 1.5842231052615492, + "learning_rate": 5.845668722992798e-07, + "loss": 0.5065, + "step": 11967 + }, + { + "epoch": 0.85, + "grad_norm": 1.6598353412845184, + "learning_rate": 5.8402778830572e-07, + "loss": 0.4621, + "step": 11968 + }, + { + "epoch": 0.85, + "grad_norm": 1.6793044472313043, + "learning_rate": 5.834889375787739e-07, + "loss": 0.5745, + "step": 11969 + }, + { + "epoch": 0.85, + "grad_norm": 1.5660511724563229, + "learning_rate": 5.829503201469083e-07, + "loss": 0.4452, + "step": 11970 + }, + { + "epoch": 0.85, + "grad_norm": 3.9328728721672395, + "learning_rate": 5.824119360385733e-07, + "loss": 0.5443, + "step": 11971 + }, + { + "epoch": 0.85, + "grad_norm": 1.741335355452342, + "learning_rate": 5.818737852822087e-07, + "loss": 0.5712, + "step": 11972 + }, + { + "epoch": 0.85, + "grad_norm": 1.796779552844296, + "learning_rate": 5.81335867906242e-07, + "loss": 0.5041, + "step": 11973 + }, + { + "epoch": 0.85, + "grad_norm": 2.1516935312292262, + "learning_rate": 5.807981839390858e-07, + "loss": 0.5378, + "step": 11974 + }, + { + "epoch": 0.85, + "grad_norm": 1.653289173553318, + "learning_rate": 5.802607334091459e-07, + "loss": 0.4443, + "step": 11975 + }, + { + "epoch": 0.85, + "grad_norm": 1.447520176777563, + "learning_rate": 5.797235163448089e-07, + "loss": 0.4965, + "step": 11976 + }, + { + "epoch": 0.85, + "grad_norm": 2.1663290283082044, + "learning_rate": 5.791865327744534e-07, + "loss": 0.5501, + "step": 11977 + }, + { + "epoch": 0.85, + "grad_norm": 2.4800003915366915, + "learning_rate": 5.786497827264453e-07, + "loss": 0.6063, + "step": 11978 + }, + { + "epoch": 0.85, + "grad_norm": 1.631201711900396, + "learning_rate": 5.781132662291377e-07, + "loss": 0.5514, + "step": 11979 + }, + { + "epoch": 0.85, + "grad_norm": 1.6529748439031402, + "learning_rate": 5.775769833108713e-07, + "loss": 0.531, + "step": 11980 + }, + { + "epoch": 0.85, + "grad_norm": 1.7593629281339247, + "learning_rate": 5.770409339999728e-07, + "loss": 0.577, + "step": 11981 + }, + { + "epoch": 0.85, + "grad_norm": 1.6046544442584851, + "learning_rate": 5.765051183247595e-07, + "loss": 0.5123, + "step": 11982 + }, + { + "epoch": 0.85, + "grad_norm": 1.781066901579231, + "learning_rate": 5.759695363135342e-07, + "loss": 0.4909, + "step": 11983 + }, + { + "epoch": 0.85, + "grad_norm": 1.7314072889320744, + "learning_rate": 5.75434187994589e-07, + "loss": 0.5058, + "step": 11984 + }, + { + "epoch": 0.85, + "grad_norm": 1.7385292732205877, + "learning_rate": 5.74899073396204e-07, + "loss": 0.4949, + "step": 11985 + }, + { + "epoch": 0.85, + "grad_norm": 2.173556611117297, + "learning_rate": 5.743641925466415e-07, + "loss": 0.5381, + "step": 11986 + }, + { + "epoch": 0.85, + "grad_norm": 1.7658006890739704, + "learning_rate": 5.738295454741605e-07, + "loss": 0.5457, + "step": 11987 + }, + { + "epoch": 0.85, + "grad_norm": 2.160098429811828, + "learning_rate": 5.732951322070001e-07, + "loss": 0.5662, + "step": 11988 + }, + { + "epoch": 0.85, + "grad_norm": 1.763675154752843, + "learning_rate": 5.727609527733902e-07, + "loss": 0.511, + "step": 11989 + }, + { + "epoch": 0.85, + "grad_norm": 2.046754691274286, + "learning_rate": 5.722270072015485e-07, + "loss": 0.5356, + "step": 11990 + }, + { + "epoch": 0.85, + "grad_norm": 2.6312526699710177, + "learning_rate": 5.716932955196808e-07, + "loss": 0.5303, + "step": 11991 + }, + { + "epoch": 0.85, + "grad_norm": 1.5787063483059043, + "learning_rate": 5.711598177559774e-07, + "loss": 0.5564, + "step": 11992 + }, + { + "epoch": 0.85, + "grad_norm": 1.5721386062010998, + "learning_rate": 5.70626573938618e-07, + "loss": 0.5092, + "step": 11993 + }, + { + "epoch": 0.85, + "grad_norm": 1.6827894632927107, + "learning_rate": 5.700935640957739e-07, + "loss": 0.5648, + "step": 11994 + }, + { + "epoch": 0.85, + "grad_norm": 1.6602957237472147, + "learning_rate": 5.695607882555976e-07, + "loss": 0.5289, + "step": 11995 + }, + { + "epoch": 0.85, + "grad_norm": 1.5933598806148763, + "learning_rate": 5.690282464462332e-07, + "loss": 0.4985, + "step": 11996 + }, + { + "epoch": 0.85, + "grad_norm": 1.5048600581733191, + "learning_rate": 5.684959386958094e-07, + "loss": 0.4492, + "step": 11997 + }, + { + "epoch": 0.85, + "grad_norm": 1.881140348018367, + "learning_rate": 5.679638650324481e-07, + "loss": 0.5342, + "step": 11998 + }, + { + "epoch": 0.85, + "grad_norm": 1.8520708976491924, + "learning_rate": 5.674320254842524e-07, + "loss": 0.606, + "step": 11999 + }, + { + "epoch": 0.85, + "grad_norm": 1.766322833678862, + "learning_rate": 5.669004200793166e-07, + "loss": 0.4552, + "step": 12000 + }, + { + "epoch": 0.85, + "grad_norm": 1.6966724404555678, + "learning_rate": 5.663690488457219e-07, + "loss": 0.4836, + "step": 12001 + }, + { + "epoch": 0.85, + "grad_norm": 1.5793062973316243, + "learning_rate": 5.65837911811537e-07, + "loss": 0.5246, + "step": 12002 + }, + { + "epoch": 0.85, + "grad_norm": 3.476583609600949, + "learning_rate": 5.653070090048202e-07, + "loss": 0.5093, + "step": 12003 + }, + { + "epoch": 0.85, + "grad_norm": 0.6700526545156688, + "learning_rate": 5.647763404536117e-07, + "loss": 0.4346, + "step": 12004 + }, + { + "epoch": 0.85, + "grad_norm": 1.5862861157103307, + "learning_rate": 5.642459061859474e-07, + "loss": 0.5377, + "step": 12005 + }, + { + "epoch": 0.85, + "grad_norm": 2.2773895841279717, + "learning_rate": 5.637157062298438e-07, + "loss": 0.4951, + "step": 12006 + }, + { + "epoch": 0.85, + "grad_norm": 1.7402309532473723, + "learning_rate": 5.631857406133084e-07, + "loss": 0.5687, + "step": 12007 + }, + { + "epoch": 0.85, + "grad_norm": 1.5248248062781713, + "learning_rate": 5.626560093643368e-07, + "loss": 0.5123, + "step": 12008 + }, + { + "epoch": 0.85, + "grad_norm": 1.7192937453511143, + "learning_rate": 5.621265125109099e-07, + "loss": 0.5385, + "step": 12009 + }, + { + "epoch": 0.85, + "grad_norm": 1.7860101084030808, + "learning_rate": 5.615972500809997e-07, + "loss": 0.4765, + "step": 12010 + }, + { + "epoch": 0.85, + "grad_norm": 1.9940093827387109, + "learning_rate": 5.610682221025609e-07, + "loss": 0.4845, + "step": 12011 + }, + { + "epoch": 0.85, + "grad_norm": 1.920110984690365, + "learning_rate": 5.605394286035398e-07, + "loss": 0.5063, + "step": 12012 + }, + { + "epoch": 0.85, + "grad_norm": 2.136361564563695, + "learning_rate": 5.600108696118689e-07, + "loss": 0.4839, + "step": 12013 + }, + { + "epoch": 0.85, + "grad_norm": 1.680940328562352, + "learning_rate": 5.594825451554687e-07, + "loss": 0.5766, + "step": 12014 + }, + { + "epoch": 0.85, + "grad_norm": 0.6913431326788476, + "learning_rate": 5.589544552622483e-07, + "loss": 0.4231, + "step": 12015 + }, + { + "epoch": 0.85, + "grad_norm": 1.6388068964632823, + "learning_rate": 5.584265999600996e-07, + "loss": 0.5978, + "step": 12016 + }, + { + "epoch": 0.85, + "grad_norm": 2.148902186931401, + "learning_rate": 5.578989792769102e-07, + "loss": 0.5397, + "step": 12017 + }, + { + "epoch": 0.85, + "grad_norm": 1.7496795071141151, + "learning_rate": 5.573715932405471e-07, + "loss": 0.5614, + "step": 12018 + }, + { + "epoch": 0.85, + "grad_norm": 2.6402507635266943, + "learning_rate": 5.568444418788715e-07, + "loss": 0.4835, + "step": 12019 + }, + { + "epoch": 0.85, + "grad_norm": 1.558014625293106, + "learning_rate": 5.563175252197256e-07, + "loss": 0.5111, + "step": 12020 + }, + { + "epoch": 0.85, + "grad_norm": 1.575170404466334, + "learning_rate": 5.55790843290947e-07, + "loss": 0.4755, + "step": 12021 + }, + { + "epoch": 0.85, + "grad_norm": 0.6657532218483908, + "learning_rate": 5.552643961203553e-07, + "loss": 0.4237, + "step": 12022 + }, + { + "epoch": 0.85, + "grad_norm": 1.5179121504360753, + "learning_rate": 5.547381837357574e-07, + "loss": 0.4417, + "step": 12023 + }, + { + "epoch": 0.85, + "grad_norm": 2.6178987988636546, + "learning_rate": 5.542122061649536e-07, + "loss": 0.5814, + "step": 12024 + }, + { + "epoch": 0.85, + "grad_norm": 1.7654513317867686, + "learning_rate": 5.536864634357236e-07, + "loss": 0.6093, + "step": 12025 + }, + { + "epoch": 0.85, + "grad_norm": 1.5524783630614913, + "learning_rate": 5.531609555758422e-07, + "loss": 0.5169, + "step": 12026 + }, + { + "epoch": 0.85, + "grad_norm": 1.909191525556932, + "learning_rate": 5.526356826130653e-07, + "loss": 0.4935, + "step": 12027 + }, + { + "epoch": 0.85, + "grad_norm": 2.0994209094052705, + "learning_rate": 5.521106445751434e-07, + "loss": 0.5474, + "step": 12028 + }, + { + "epoch": 0.85, + "grad_norm": 0.6983958537719402, + "learning_rate": 5.515858414898073e-07, + "loss": 0.412, + "step": 12029 + }, + { + "epoch": 0.85, + "grad_norm": 0.7928440357945047, + "learning_rate": 5.510612733847803e-07, + "loss": 0.4348, + "step": 12030 + }, + { + "epoch": 0.85, + "grad_norm": 1.4664762331090024, + "learning_rate": 5.505369402877725e-07, + "loss": 0.5151, + "step": 12031 + }, + { + "epoch": 0.85, + "grad_norm": 1.6898242897804663, + "learning_rate": 5.5001284222648e-07, + "loss": 0.5296, + "step": 12032 + }, + { + "epoch": 0.85, + "grad_norm": 1.5250434961707768, + "learning_rate": 5.49488979228589e-07, + "loss": 0.4573, + "step": 12033 + }, + { + "epoch": 0.85, + "grad_norm": 2.103412785083146, + "learning_rate": 5.489653513217691e-07, + "loss": 0.5195, + "step": 12034 + }, + { + "epoch": 0.85, + "grad_norm": 1.641010230967123, + "learning_rate": 5.484419585336814e-07, + "loss": 0.5095, + "step": 12035 + }, + { + "epoch": 0.85, + "grad_norm": 1.7552337280410197, + "learning_rate": 5.47918800891974e-07, + "loss": 0.4962, + "step": 12036 + }, + { + "epoch": 0.85, + "grad_norm": 1.7317893223176797, + "learning_rate": 5.473958784242812e-07, + "loss": 0.4386, + "step": 12037 + }, + { + "epoch": 0.85, + "grad_norm": 1.4480586389367875, + "learning_rate": 5.468731911582254e-07, + "loss": 0.5062, + "step": 12038 + }, + { + "epoch": 0.85, + "grad_norm": 0.6754492727268356, + "learning_rate": 5.463507391214168e-07, + "loss": 0.4174, + "step": 12039 + }, + { + "epoch": 0.85, + "grad_norm": 1.8190524211184038, + "learning_rate": 5.458285223414539e-07, + "loss": 0.5299, + "step": 12040 + }, + { + "epoch": 0.85, + "grad_norm": 1.4856174208518556, + "learning_rate": 5.453065408459207e-07, + "loss": 0.4258, + "step": 12041 + }, + { + "epoch": 0.85, + "grad_norm": 1.784321253695816, + "learning_rate": 5.447847946623902e-07, + "loss": 0.5175, + "step": 12042 + }, + { + "epoch": 0.85, + "grad_norm": 1.9916579404944152, + "learning_rate": 5.442632838184236e-07, + "loss": 0.5598, + "step": 12043 + }, + { + "epoch": 0.85, + "grad_norm": 1.8033688602047737, + "learning_rate": 5.437420083415685e-07, + "loss": 0.5502, + "step": 12044 + }, + { + "epoch": 0.85, + "grad_norm": 1.556711842674976, + "learning_rate": 5.432209682593608e-07, + "loss": 0.4807, + "step": 12045 + }, + { + "epoch": 0.85, + "grad_norm": 1.6746330320669653, + "learning_rate": 5.427001635993217e-07, + "loss": 0.4666, + "step": 12046 + }, + { + "epoch": 0.85, + "grad_norm": 1.6629748677795093, + "learning_rate": 5.421795943889652e-07, + "loss": 0.4699, + "step": 12047 + }, + { + "epoch": 0.85, + "grad_norm": 1.5964575465911919, + "learning_rate": 5.416592606557863e-07, + "loss": 0.4887, + "step": 12048 + }, + { + "epoch": 0.86, + "grad_norm": 1.8567422656838277, + "learning_rate": 5.411391624272727e-07, + "loss": 0.5255, + "step": 12049 + }, + { + "epoch": 0.86, + "grad_norm": 1.8014161062785252, + "learning_rate": 5.406192997308973e-07, + "loss": 0.5168, + "step": 12050 + }, + { + "epoch": 0.86, + "grad_norm": 1.6256615920801798, + "learning_rate": 5.400996725941205e-07, + "loss": 0.512, + "step": 12051 + }, + { + "epoch": 0.86, + "grad_norm": 1.7403596764645173, + "learning_rate": 5.395802810443923e-07, + "loss": 0.5603, + "step": 12052 + }, + { + "epoch": 0.86, + "grad_norm": 1.6584507749108262, + "learning_rate": 5.39061125109146e-07, + "loss": 0.5452, + "step": 12053 + }, + { + "epoch": 0.86, + "grad_norm": 1.823114153572426, + "learning_rate": 5.385422048158085e-07, + "loss": 0.6018, + "step": 12054 + }, + { + "epoch": 0.86, + "grad_norm": 2.4286725902888806, + "learning_rate": 5.380235201917882e-07, + "loss": 0.511, + "step": 12055 + }, + { + "epoch": 0.86, + "grad_norm": 2.3663443547860146, + "learning_rate": 5.375050712644858e-07, + "loss": 0.5802, + "step": 12056 + }, + { + "epoch": 0.86, + "grad_norm": 1.7460093394806389, + "learning_rate": 5.369868580612842e-07, + "loss": 0.558, + "step": 12057 + }, + { + "epoch": 0.86, + "grad_norm": 0.6998336888058949, + "learning_rate": 5.364688806095609e-07, + "loss": 0.4179, + "step": 12058 + }, + { + "epoch": 0.86, + "grad_norm": 2.1630715981600535, + "learning_rate": 5.359511389366767e-07, + "loss": 0.4901, + "step": 12059 + }, + { + "epoch": 0.86, + "grad_norm": 1.6441422860491586, + "learning_rate": 5.354336330699788e-07, + "loss": 0.5737, + "step": 12060 + }, + { + "epoch": 0.86, + "grad_norm": 1.8662961592034863, + "learning_rate": 5.349163630368042e-07, + "loss": 0.5537, + "step": 12061 + }, + { + "epoch": 0.86, + "grad_norm": 2.0962281472131985, + "learning_rate": 5.343993288644777e-07, + "loss": 0.4396, + "step": 12062 + }, + { + "epoch": 0.86, + "grad_norm": 1.61177359644906, + "learning_rate": 5.338825305803102e-07, + "loss": 0.468, + "step": 12063 + }, + { + "epoch": 0.86, + "grad_norm": 2.0672792530757085, + "learning_rate": 5.333659682116004e-07, + "loss": 0.5515, + "step": 12064 + }, + { + "epoch": 0.86, + "grad_norm": 1.9013734072368138, + "learning_rate": 5.328496417856349e-07, + "loss": 0.5187, + "step": 12065 + }, + { + "epoch": 0.86, + "grad_norm": 1.5294908486908725, + "learning_rate": 5.323335513296884e-07, + "loss": 0.4935, + "step": 12066 + }, + { + "epoch": 0.86, + "grad_norm": 1.9246820219887248, + "learning_rate": 5.31817696871022e-07, + "loss": 0.5141, + "step": 12067 + }, + { + "epoch": 0.86, + "grad_norm": 0.7259973959079472, + "learning_rate": 5.313020784368861e-07, + "loss": 0.461, + "step": 12068 + }, + { + "epoch": 0.86, + "grad_norm": 1.6236963469958938, + "learning_rate": 5.307866960545144e-07, + "loss": 0.5257, + "step": 12069 + }, + { + "epoch": 0.86, + "grad_norm": 0.7099428569771515, + "learning_rate": 5.302715497511351e-07, + "loss": 0.3998, + "step": 12070 + }, + { + "epoch": 0.86, + "grad_norm": 1.9952794355632641, + "learning_rate": 5.297566395539571e-07, + "loss": 0.5733, + "step": 12071 + }, + { + "epoch": 0.86, + "grad_norm": 1.6044162405945104, + "learning_rate": 5.29241965490181e-07, + "loss": 0.476, + "step": 12072 + }, + { + "epoch": 0.86, + "grad_norm": 1.5679247108505296, + "learning_rate": 5.28727527586993e-07, + "loss": 0.5269, + "step": 12073 + }, + { + "epoch": 0.86, + "grad_norm": 1.9704135839261498, + "learning_rate": 5.282133258715677e-07, + "loss": 0.5683, + "step": 12074 + }, + { + "epoch": 0.86, + "grad_norm": 2.309242749017236, + "learning_rate": 5.276993603710678e-07, + "loss": 0.5363, + "step": 12075 + }, + { + "epoch": 0.86, + "grad_norm": 1.7193020503878274, + "learning_rate": 5.2718563111264e-07, + "loss": 0.532, + "step": 12076 + }, + { + "epoch": 0.86, + "grad_norm": 1.7178981824215593, + "learning_rate": 5.266721381234257e-07, + "loss": 0.5285, + "step": 12077 + }, + { + "epoch": 0.86, + "grad_norm": 0.629481930654937, + "learning_rate": 5.261588814305452e-07, + "loss": 0.4033, + "step": 12078 + }, + { + "epoch": 0.86, + "grad_norm": 2.1712144146576677, + "learning_rate": 5.256458610611121e-07, + "loss": 0.5291, + "step": 12079 + }, + { + "epoch": 0.86, + "grad_norm": 2.427262181699948, + "learning_rate": 5.251330770422258e-07, + "loss": 0.589, + "step": 12080 + }, + { + "epoch": 0.86, + "grad_norm": 1.8820242009404307, + "learning_rate": 5.246205294009732e-07, + "loss": 0.5146, + "step": 12081 + }, + { + "epoch": 0.86, + "grad_norm": 1.9702102334884573, + "learning_rate": 5.241082181644302e-07, + "loss": 0.4971, + "step": 12082 + }, + { + "epoch": 0.86, + "grad_norm": 1.7184838730810563, + "learning_rate": 5.235961433596565e-07, + "loss": 0.5211, + "step": 12083 + }, + { + "epoch": 0.86, + "grad_norm": 1.6681081441983008, + "learning_rate": 5.230843050137024e-07, + "loss": 0.5036, + "step": 12084 + }, + { + "epoch": 0.86, + "grad_norm": 1.5312797730920813, + "learning_rate": 5.225727031536055e-07, + "loss": 0.4887, + "step": 12085 + }, + { + "epoch": 0.86, + "grad_norm": 1.7656844943859566, + "learning_rate": 5.220613378063893e-07, + "loss": 0.5046, + "step": 12086 + }, + { + "epoch": 0.86, + "grad_norm": 1.655327749347915, + "learning_rate": 5.21550208999067e-07, + "loss": 0.4994, + "step": 12087 + }, + { + "epoch": 0.86, + "grad_norm": 7.020194081515909, + "learning_rate": 5.21039316758638e-07, + "loss": 0.5245, + "step": 12088 + }, + { + "epoch": 0.86, + "grad_norm": 1.709507317574151, + "learning_rate": 5.205286611120897e-07, + "loss": 0.4874, + "step": 12089 + }, + { + "epoch": 0.86, + "grad_norm": 1.747767822812459, + "learning_rate": 5.200182420863952e-07, + "loss": 0.4893, + "step": 12090 + }, + { + "epoch": 0.86, + "grad_norm": 1.5865999789820204, + "learning_rate": 5.195080597085178e-07, + "loss": 0.4697, + "step": 12091 + }, + { + "epoch": 0.86, + "grad_norm": 1.83209616480102, + "learning_rate": 5.189981140054063e-07, + "loss": 0.5203, + "step": 12092 + }, + { + "epoch": 0.86, + "grad_norm": 2.4771489892692724, + "learning_rate": 5.18488405003999e-07, + "loss": 0.5805, + "step": 12093 + }, + { + "epoch": 0.86, + "grad_norm": 1.7656458112427895, + "learning_rate": 5.179789327312184e-07, + "loss": 0.5586, + "step": 12094 + }, + { + "epoch": 0.86, + "grad_norm": 2.0143316911300655, + "learning_rate": 5.174696972139781e-07, + "loss": 0.5097, + "step": 12095 + }, + { + "epoch": 0.86, + "grad_norm": 1.7383149672425786, + "learning_rate": 5.16960698479177e-07, + "loss": 0.5509, + "step": 12096 + }, + { + "epoch": 0.86, + "grad_norm": 1.5306551092838805, + "learning_rate": 5.164519365537025e-07, + "loss": 0.5233, + "step": 12097 + }, + { + "epoch": 0.86, + "grad_norm": 1.675045905313407, + "learning_rate": 5.159434114644302e-07, + "loss": 0.5852, + "step": 12098 + }, + { + "epoch": 0.86, + "grad_norm": 1.6799394272277186, + "learning_rate": 5.154351232382188e-07, + "loss": 0.5636, + "step": 12099 + }, + { + "epoch": 0.86, + "grad_norm": 1.7960446852641612, + "learning_rate": 5.149270719019223e-07, + "loss": 0.6451, + "step": 12100 + }, + { + "epoch": 0.86, + "grad_norm": 1.714993917872733, + "learning_rate": 5.144192574823742e-07, + "loss": 0.5519, + "step": 12101 + }, + { + "epoch": 0.86, + "grad_norm": 1.6761722902531901, + "learning_rate": 5.139116800063998e-07, + "loss": 0.4248, + "step": 12102 + }, + { + "epoch": 0.86, + "grad_norm": 1.7825219133398151, + "learning_rate": 5.13404339500812e-07, + "loss": 0.5394, + "step": 12103 + }, + { + "epoch": 0.86, + "grad_norm": 1.5716209153934202, + "learning_rate": 5.128972359924089e-07, + "loss": 0.481, + "step": 12104 + }, + { + "epoch": 0.86, + "grad_norm": 1.971697522478389, + "learning_rate": 5.123903695079796e-07, + "loss": 0.5165, + "step": 12105 + }, + { + "epoch": 0.86, + "grad_norm": 1.8548097920168471, + "learning_rate": 5.118837400742954e-07, + "loss": 0.5681, + "step": 12106 + }, + { + "epoch": 0.86, + "grad_norm": 1.7539230019943948, + "learning_rate": 5.113773477181216e-07, + "loss": 0.4872, + "step": 12107 + }, + { + "epoch": 0.86, + "grad_norm": 1.5255602134477608, + "learning_rate": 5.108711924662047e-07, + "loss": 0.4749, + "step": 12108 + }, + { + "epoch": 0.86, + "grad_norm": 1.5014163171506343, + "learning_rate": 5.103652743452824e-07, + "loss": 0.5196, + "step": 12109 + }, + { + "epoch": 0.86, + "grad_norm": 1.6893249124076233, + "learning_rate": 5.098595933820799e-07, + "loss": 0.4927, + "step": 12110 + }, + { + "epoch": 0.86, + "grad_norm": 2.487606515304919, + "learning_rate": 5.093541496033077e-07, + "loss": 0.517, + "step": 12111 + }, + { + "epoch": 0.86, + "grad_norm": 3.450134089478849, + "learning_rate": 5.088489430356675e-07, + "loss": 0.5933, + "step": 12112 + }, + { + "epoch": 0.86, + "grad_norm": 1.720673500365488, + "learning_rate": 5.083439737058426e-07, + "loss": 0.5951, + "step": 12113 + }, + { + "epoch": 0.86, + "grad_norm": 2.9764661887884247, + "learning_rate": 5.078392416405092e-07, + "loss": 0.5001, + "step": 12114 + }, + { + "epoch": 0.86, + "grad_norm": 0.7084252367065852, + "learning_rate": 5.073347468663281e-07, + "loss": 0.432, + "step": 12115 + }, + { + "epoch": 0.86, + "grad_norm": 1.9012352673683215, + "learning_rate": 5.06830489409949e-07, + "loss": 0.4794, + "step": 12116 + }, + { + "epoch": 0.86, + "grad_norm": 1.934534961171081, + "learning_rate": 5.063264692980085e-07, + "loss": 0.4781, + "step": 12117 + }, + { + "epoch": 0.86, + "grad_norm": 1.5942180535648045, + "learning_rate": 5.058226865571309e-07, + "loss": 0.5353, + "step": 12118 + }, + { + "epoch": 0.86, + "grad_norm": 1.547990741499169, + "learning_rate": 5.053191412139279e-07, + "loss": 0.4611, + "step": 12119 + }, + { + "epoch": 0.86, + "grad_norm": 1.4751136971484011, + "learning_rate": 5.048158332949971e-07, + "loss": 0.5189, + "step": 12120 + }, + { + "epoch": 0.86, + "grad_norm": 2.0279100116624313, + "learning_rate": 5.043127628269257e-07, + "loss": 0.4304, + "step": 12121 + }, + { + "epoch": 0.86, + "grad_norm": 0.717671125696523, + "learning_rate": 5.038099298362876e-07, + "loss": 0.4499, + "step": 12122 + }, + { + "epoch": 0.86, + "grad_norm": 1.8596612724278592, + "learning_rate": 5.033073343496453e-07, + "loss": 0.505, + "step": 12123 + }, + { + "epoch": 0.86, + "grad_norm": 1.5724992404653202, + "learning_rate": 5.028049763935455e-07, + "loss": 0.534, + "step": 12124 + }, + { + "epoch": 0.86, + "grad_norm": 2.579980142088563, + "learning_rate": 5.023028559945242e-07, + "loss": 0.5759, + "step": 12125 + }, + { + "epoch": 0.86, + "grad_norm": 1.8369410938202544, + "learning_rate": 5.018009731791084e-07, + "loss": 0.5286, + "step": 12126 + }, + { + "epoch": 0.86, + "grad_norm": 1.53515209216614, + "learning_rate": 5.01299327973806e-07, + "loss": 0.4386, + "step": 12127 + }, + { + "epoch": 0.86, + "grad_norm": 1.5529780041185468, + "learning_rate": 5.007979204051183e-07, + "loss": 0.4763, + "step": 12128 + }, + { + "epoch": 0.86, + "grad_norm": 3.1611284036240366, + "learning_rate": 5.002967504995271e-07, + "loss": 0.4622, + "step": 12129 + }, + { + "epoch": 0.86, + "grad_norm": 1.589932854497379, + "learning_rate": 4.997958182835111e-07, + "loss": 0.5161, + "step": 12130 + }, + { + "epoch": 0.86, + "grad_norm": 1.6825136586889002, + "learning_rate": 4.99295123783528e-07, + "loss": 0.4651, + "step": 12131 + }, + { + "epoch": 0.86, + "grad_norm": 1.7666139925499125, + "learning_rate": 4.987946670260263e-07, + "loss": 0.5054, + "step": 12132 + }, + { + "epoch": 0.86, + "grad_norm": 1.8985189461266954, + "learning_rate": 4.98294448037443e-07, + "loss": 0.5659, + "step": 12133 + }, + { + "epoch": 0.86, + "grad_norm": 1.697755180128862, + "learning_rate": 4.977944668442003e-07, + "loss": 0.4916, + "step": 12134 + }, + { + "epoch": 0.86, + "grad_norm": 1.657827573498729, + "learning_rate": 4.97294723472711e-07, + "loss": 0.5343, + "step": 12135 + }, + { + "epoch": 0.86, + "grad_norm": 2.4648167673068264, + "learning_rate": 4.96795217949369e-07, + "loss": 0.5635, + "step": 12136 + }, + { + "epoch": 0.86, + "grad_norm": 1.5133949060819472, + "learning_rate": 4.962959503005649e-07, + "loss": 0.4406, + "step": 12137 + }, + { + "epoch": 0.86, + "grad_norm": 1.7895249187527886, + "learning_rate": 4.957969205526686e-07, + "loss": 0.5649, + "step": 12138 + }, + { + "epoch": 0.86, + "grad_norm": 1.5133005982878787, + "learning_rate": 4.952981287320407e-07, + "loss": 0.418, + "step": 12139 + }, + { + "epoch": 0.86, + "grad_norm": 1.635699707122845, + "learning_rate": 4.947995748650302e-07, + "loss": 0.5516, + "step": 12140 + }, + { + "epoch": 0.86, + "grad_norm": 1.9715144815710928, + "learning_rate": 4.943012589779716e-07, + "loss": 0.5206, + "step": 12141 + }, + { + "epoch": 0.86, + "grad_norm": 1.598388057457011, + "learning_rate": 4.938031810971888e-07, + "loss": 0.4724, + "step": 12142 + }, + { + "epoch": 0.86, + "grad_norm": 1.8273366052974582, + "learning_rate": 4.933053412489902e-07, + "loss": 0.5691, + "step": 12143 + }, + { + "epoch": 0.86, + "grad_norm": 1.5825041862283393, + "learning_rate": 4.928077394596742e-07, + "loss": 0.4957, + "step": 12144 + }, + { + "epoch": 0.86, + "grad_norm": 1.8447191952001432, + "learning_rate": 4.923103757555258e-07, + "loss": 0.5366, + "step": 12145 + }, + { + "epoch": 0.86, + "grad_norm": 2.060233920792918, + "learning_rate": 4.918132501628175e-07, + "loss": 0.5439, + "step": 12146 + }, + { + "epoch": 0.86, + "grad_norm": 1.6660802609296108, + "learning_rate": 4.913163627078099e-07, + "loss": 0.5432, + "step": 12147 + }, + { + "epoch": 0.86, + "grad_norm": 1.5470866122301923, + "learning_rate": 4.90819713416748e-07, + "loss": 0.4659, + "step": 12148 + }, + { + "epoch": 0.86, + "grad_norm": 0.7241297530832049, + "learning_rate": 4.903233023158698e-07, + "loss": 0.4087, + "step": 12149 + }, + { + "epoch": 0.86, + "grad_norm": 1.685307021917687, + "learning_rate": 4.898271294313944e-07, + "loss": 0.5857, + "step": 12150 + }, + { + "epoch": 0.86, + "grad_norm": 0.6678768821126596, + "learning_rate": 4.89331194789533e-07, + "loss": 0.4349, + "step": 12151 + }, + { + "epoch": 0.86, + "grad_norm": 0.6772124036922401, + "learning_rate": 4.888354984164823e-07, + "loss": 0.4568, + "step": 12152 + }, + { + "epoch": 0.86, + "grad_norm": 1.576651885600271, + "learning_rate": 4.883400403384259e-07, + "loss": 0.5131, + "step": 12153 + }, + { + "epoch": 0.86, + "grad_norm": 2.2211948321094903, + "learning_rate": 4.878448205815372e-07, + "loss": 0.4455, + "step": 12154 + }, + { + "epoch": 0.86, + "grad_norm": 1.5587162238148649, + "learning_rate": 4.873498391719728e-07, + "loss": 0.5154, + "step": 12155 + }, + { + "epoch": 0.86, + "grad_norm": 1.6161597679007669, + "learning_rate": 4.868550961358825e-07, + "loss": 0.5306, + "step": 12156 + }, + { + "epoch": 0.86, + "grad_norm": 3.3085015321875897, + "learning_rate": 4.863605914993979e-07, + "loss": 0.5806, + "step": 12157 + }, + { + "epoch": 0.86, + "grad_norm": 2.6144151940945757, + "learning_rate": 4.858663252886419e-07, + "loss": 0.556, + "step": 12158 + }, + { + "epoch": 0.86, + "grad_norm": 1.647844550256474, + "learning_rate": 4.853722975297209e-07, + "loss": 0.5075, + "step": 12159 + }, + { + "epoch": 0.86, + "grad_norm": 1.725145035576416, + "learning_rate": 4.848785082487345e-07, + "loss": 0.4851, + "step": 12160 + }, + { + "epoch": 0.86, + "grad_norm": 2.0257575725206105, + "learning_rate": 4.843849574717635e-07, + "loss": 0.5087, + "step": 12161 + }, + { + "epoch": 0.86, + "grad_norm": 1.4482759857354528, + "learning_rate": 4.838916452248804e-07, + "loss": 0.5182, + "step": 12162 + }, + { + "epoch": 0.86, + "grad_norm": 3.8689034669336997, + "learning_rate": 4.833985715341427e-07, + "loss": 0.5332, + "step": 12163 + }, + { + "epoch": 0.86, + "grad_norm": 1.43619520340393, + "learning_rate": 4.829057364255973e-07, + "loss": 0.5224, + "step": 12164 + }, + { + "epoch": 0.86, + "grad_norm": 2.2270340663767403, + "learning_rate": 4.82413139925278e-07, + "loss": 0.5107, + "step": 12165 + }, + { + "epoch": 0.86, + "grad_norm": 1.716543905864648, + "learning_rate": 4.81920782059202e-07, + "loss": 0.4671, + "step": 12166 + }, + { + "epoch": 0.86, + "grad_norm": 1.503242822941123, + "learning_rate": 4.814286628533815e-07, + "loss": 0.5388, + "step": 12167 + }, + { + "epoch": 0.86, + "grad_norm": 2.4207722056783836, + "learning_rate": 4.809367823338096e-07, + "loss": 0.5674, + "step": 12168 + }, + { + "epoch": 0.86, + "grad_norm": 1.7530497405773005, + "learning_rate": 4.804451405264692e-07, + "loss": 0.4925, + "step": 12169 + }, + { + "epoch": 0.86, + "grad_norm": 2.140599674070669, + "learning_rate": 4.799537374573304e-07, + "loss": 0.5268, + "step": 12170 + }, + { + "epoch": 0.86, + "grad_norm": 1.569107162677777, + "learning_rate": 4.794625731523517e-07, + "loss": 0.5185, + "step": 12171 + }, + { + "epoch": 0.86, + "grad_norm": 1.620896084807001, + "learning_rate": 4.789716476374784e-07, + "loss": 0.5122, + "step": 12172 + }, + { + "epoch": 0.86, + "grad_norm": 1.5041944784747527, + "learning_rate": 4.784809609386415e-07, + "loss": 0.4732, + "step": 12173 + }, + { + "epoch": 0.86, + "grad_norm": 1.5013790616513638, + "learning_rate": 4.779905130817608e-07, + "loss": 0.5438, + "step": 12174 + }, + { + "epoch": 0.86, + "grad_norm": 1.5352456299231325, + "learning_rate": 4.77500304092744e-07, + "loss": 0.4751, + "step": 12175 + }, + { + "epoch": 0.86, + "grad_norm": 1.5387240448379724, + "learning_rate": 4.770103339974852e-07, + "loss": 0.4961, + "step": 12176 + }, + { + "epoch": 0.86, + "grad_norm": 1.591140801312342, + "learning_rate": 4.7652060282186775e-07, + "loss": 0.5523, + "step": 12177 + }, + { + "epoch": 0.86, + "grad_norm": 1.6424000696599224, + "learning_rate": 4.7603111059175756e-07, + "loss": 0.4376, + "step": 12178 + }, + { + "epoch": 0.86, + "grad_norm": 1.9327883253462246, + "learning_rate": 4.755418573330151e-07, + "loss": 0.5095, + "step": 12179 + }, + { + "epoch": 0.86, + "grad_norm": 1.8885801303110779, + "learning_rate": 4.750528430714824e-07, + "loss": 0.5017, + "step": 12180 + }, + { + "epoch": 0.86, + "grad_norm": 2.256980254772972, + "learning_rate": 4.745640678329905e-07, + "loss": 0.5392, + "step": 12181 + }, + { + "epoch": 0.86, + "grad_norm": 1.8145944241286276, + "learning_rate": 4.740755316433593e-07, + "loss": 0.553, + "step": 12182 + }, + { + "epoch": 0.86, + "grad_norm": 1.8397057471017302, + "learning_rate": 4.7358723452839426e-07, + "loss": 0.439, + "step": 12183 + }, + { + "epoch": 0.86, + "grad_norm": 1.9499841666007993, + "learning_rate": 4.730991765138898e-07, + "loss": 0.506, + "step": 12184 + }, + { + "epoch": 0.86, + "grad_norm": 1.5753479520584128, + "learning_rate": 4.726113576256247e-07, + "loss": 0.5335, + "step": 12185 + }, + { + "epoch": 0.86, + "grad_norm": 2.1829950717224986, + "learning_rate": 4.7212377788937005e-07, + "loss": 0.559, + "step": 12186 + }, + { + "epoch": 0.86, + "grad_norm": 1.571909014710363, + "learning_rate": 4.7163643733087907e-07, + "loss": 0.568, + "step": 12187 + }, + { + "epoch": 0.86, + "grad_norm": 1.6727449077048773, + "learning_rate": 4.7114933597589673e-07, + "loss": 0.5323, + "step": 12188 + }, + { + "epoch": 0.86, + "grad_norm": 0.7738311036565972, + "learning_rate": 4.7066247385015027e-07, + "loss": 0.4164, + "step": 12189 + }, + { + "epoch": 0.87, + "grad_norm": 1.738045667402, + "learning_rate": 4.701758509793608e-07, + "loss": 0.56, + "step": 12190 + }, + { + "epoch": 0.87, + "grad_norm": 1.7779965836390155, + "learning_rate": 4.696894673892327e-07, + "loss": 0.504, + "step": 12191 + }, + { + "epoch": 0.87, + "grad_norm": 1.8310565899920823, + "learning_rate": 4.692033231054566e-07, + "loss": 0.5157, + "step": 12192 + }, + { + "epoch": 0.87, + "grad_norm": 2.140515636280888, + "learning_rate": 4.6871741815371354e-07, + "loss": 0.5489, + "step": 12193 + }, + { + "epoch": 0.87, + "grad_norm": 1.6909652336897303, + "learning_rate": 4.6823175255967025e-07, + "loss": 0.5552, + "step": 12194 + }, + { + "epoch": 0.87, + "grad_norm": 1.6428281737923989, + "learning_rate": 4.6774632634898287e-07, + "loss": 0.504, + "step": 12195 + }, + { + "epoch": 0.87, + "grad_norm": 5.233064281697578, + "learning_rate": 4.6726113954729093e-07, + "loss": 0.4851, + "step": 12196 + }, + { + "epoch": 0.87, + "grad_norm": 2.4278839710414037, + "learning_rate": 4.667761921802244e-07, + "loss": 0.5335, + "step": 12197 + }, + { + "epoch": 0.87, + "grad_norm": 1.822449788922429, + "learning_rate": 4.662914842734001e-07, + "loss": 0.4602, + "step": 12198 + }, + { + "epoch": 0.87, + "grad_norm": 1.437880637952307, + "learning_rate": 4.658070158524219e-07, + "loss": 0.5095, + "step": 12199 + }, + { + "epoch": 0.87, + "grad_norm": 1.730726776520087, + "learning_rate": 4.653227869428817e-07, + "loss": 0.5485, + "step": 12200 + }, + { + "epoch": 0.87, + "grad_norm": 1.705346105941609, + "learning_rate": 4.648387975703567e-07, + "loss": 0.5518, + "step": 12201 + }, + { + "epoch": 0.87, + "grad_norm": 1.6647249562117743, + "learning_rate": 4.643550477604147e-07, + "loss": 0.5642, + "step": 12202 + }, + { + "epoch": 0.87, + "grad_norm": 1.69807256236346, + "learning_rate": 4.6387153753860713e-07, + "loss": 0.5527, + "step": 12203 + }, + { + "epoch": 0.87, + "grad_norm": 1.4948495098515497, + "learning_rate": 4.633882669304757e-07, + "loss": 0.4906, + "step": 12204 + }, + { + "epoch": 0.87, + "grad_norm": 1.9042973386029913, + "learning_rate": 4.629052359615477e-07, + "loss": 0.5882, + "step": 12205 + }, + { + "epoch": 0.87, + "grad_norm": 1.9993910482253405, + "learning_rate": 4.6242244465733887e-07, + "loss": 0.4814, + "step": 12206 + }, + { + "epoch": 0.87, + "grad_norm": 1.8624835543717628, + "learning_rate": 4.619398930433533e-07, + "loss": 0.4928, + "step": 12207 + }, + { + "epoch": 0.87, + "grad_norm": 6.807461723652015, + "learning_rate": 4.614575811450772e-07, + "loss": 0.5246, + "step": 12208 + }, + { + "epoch": 0.87, + "grad_norm": 1.721552280749168, + "learning_rate": 4.6097550898799246e-07, + "loss": 0.4994, + "step": 12209 + }, + { + "epoch": 0.87, + "grad_norm": 1.8041626559165236, + "learning_rate": 4.6049367659756095e-07, + "loss": 0.4952, + "step": 12210 + }, + { + "epoch": 0.87, + "grad_norm": 1.597638959299611, + "learning_rate": 4.600120839992356e-07, + "loss": 0.5036, + "step": 12211 + }, + { + "epoch": 0.87, + "grad_norm": 2.0856481033150116, + "learning_rate": 4.595307312184549e-07, + "loss": 0.4946, + "step": 12212 + }, + { + "epoch": 0.87, + "grad_norm": 1.934238513870674, + "learning_rate": 4.5904961828064644e-07, + "loss": 0.5673, + "step": 12213 + }, + { + "epoch": 0.87, + "grad_norm": 0.6861848302017461, + "learning_rate": 4.585687452112253e-07, + "loss": 0.4311, + "step": 12214 + }, + { + "epoch": 0.87, + "grad_norm": 1.602783972828645, + "learning_rate": 4.58088112035589e-07, + "loss": 0.5511, + "step": 12215 + }, + { + "epoch": 0.87, + "grad_norm": 1.8163133925421504, + "learning_rate": 4.5760771877913115e-07, + "loss": 0.4696, + "step": 12216 + }, + { + "epoch": 0.87, + "grad_norm": 1.5549107908748099, + "learning_rate": 4.571275654672236e-07, + "loss": 0.5314, + "step": 12217 + }, + { + "epoch": 0.87, + "grad_norm": 1.4682993704480658, + "learning_rate": 4.566476521252322e-07, + "loss": 0.5016, + "step": 12218 + }, + { + "epoch": 0.87, + "grad_norm": 1.5632442109196556, + "learning_rate": 4.561679787785067e-07, + "loss": 0.5414, + "step": 12219 + }, + { + "epoch": 0.87, + "grad_norm": 1.6368322537500342, + "learning_rate": 4.556885454523846e-07, + "loss": 0.5111, + "step": 12220 + }, + { + "epoch": 0.87, + "grad_norm": 1.7212020670434314, + "learning_rate": 4.552093521721934e-07, + "loss": 0.5288, + "step": 12221 + }, + { + "epoch": 0.87, + "grad_norm": 4.585935044380919, + "learning_rate": 4.5473039896324233e-07, + "loss": 0.5186, + "step": 12222 + }, + { + "epoch": 0.87, + "grad_norm": 1.8217472155595527, + "learning_rate": 4.542516858508339e-07, + "loss": 0.4889, + "step": 12223 + }, + { + "epoch": 0.87, + "grad_norm": 1.6975788259832736, + "learning_rate": 4.53773212860254e-07, + "loss": 0.5154, + "step": 12224 + }, + { + "epoch": 0.87, + "grad_norm": 1.7222590318789708, + "learning_rate": 4.5329498001677905e-07, + "loss": 0.5464, + "step": 12225 + }, + { + "epoch": 0.87, + "grad_norm": 1.8697217533368162, + "learning_rate": 4.5281698734566835e-07, + "loss": 0.535, + "step": 12226 + }, + { + "epoch": 0.87, + "grad_norm": 2.48609239735506, + "learning_rate": 4.5233923487217226e-07, + "loss": 0.5536, + "step": 12227 + }, + { + "epoch": 0.87, + "grad_norm": 1.5826131514665254, + "learning_rate": 4.518617226215277e-07, + "loss": 0.4956, + "step": 12228 + }, + { + "epoch": 0.87, + "grad_norm": 9.687318013996034, + "learning_rate": 4.5138445061895795e-07, + "loss": 0.5498, + "step": 12229 + }, + { + "epoch": 0.87, + "grad_norm": 1.8386186094074422, + "learning_rate": 4.509074188896739e-07, + "loss": 0.5727, + "step": 12230 + }, + { + "epoch": 0.87, + "grad_norm": 1.490911653614934, + "learning_rate": 4.504306274588749e-07, + "loss": 0.5204, + "step": 12231 + }, + { + "epoch": 0.87, + "grad_norm": 0.7023216089004408, + "learning_rate": 4.4995407635174736e-07, + "loss": 0.4229, + "step": 12232 + }, + { + "epoch": 0.87, + "grad_norm": 1.674072967666397, + "learning_rate": 4.494777655934618e-07, + "loss": 0.4756, + "step": 12233 + }, + { + "epoch": 0.87, + "grad_norm": 2.0797090444859876, + "learning_rate": 4.4900169520918026e-07, + "loss": 0.5856, + "step": 12234 + }, + { + "epoch": 0.87, + "grad_norm": 2.6762990884206603, + "learning_rate": 4.4852586522404984e-07, + "loss": 0.5065, + "step": 12235 + }, + { + "epoch": 0.87, + "grad_norm": 1.5668661230471939, + "learning_rate": 4.4805027566320545e-07, + "loss": 0.4956, + "step": 12236 + }, + { + "epoch": 0.87, + "grad_norm": 2.866681017111776, + "learning_rate": 4.475749265517709e-07, + "loss": 0.5285, + "step": 12237 + }, + { + "epoch": 0.87, + "grad_norm": 1.575889174278197, + "learning_rate": 4.470998179148528e-07, + "loss": 0.4711, + "step": 12238 + }, + { + "epoch": 0.87, + "grad_norm": 2.1998939538644966, + "learning_rate": 4.46624949777551e-07, + "loss": 0.622, + "step": 12239 + }, + { + "epoch": 0.87, + "grad_norm": 1.7718647567704433, + "learning_rate": 4.461503221649477e-07, + "loss": 0.5372, + "step": 12240 + }, + { + "epoch": 0.87, + "grad_norm": 1.5768677510647495, + "learning_rate": 4.4567593510211504e-07, + "loss": 0.5327, + "step": 12241 + }, + { + "epoch": 0.87, + "grad_norm": 1.5869406877975802, + "learning_rate": 4.4520178861411134e-07, + "loss": 0.4802, + "step": 12242 + }, + { + "epoch": 0.87, + "grad_norm": 1.8484366133985686, + "learning_rate": 4.4472788272598324e-07, + "loss": 0.5441, + "step": 12243 + }, + { + "epoch": 0.87, + "grad_norm": 2.2211458665442376, + "learning_rate": 4.4425421746276456e-07, + "loss": 0.4832, + "step": 12244 + }, + { + "epoch": 0.87, + "grad_norm": 1.886772703376908, + "learning_rate": 4.4378079284947307e-07, + "loss": 0.5145, + "step": 12245 + }, + { + "epoch": 0.87, + "grad_norm": 0.6417831384396389, + "learning_rate": 4.4330760891112044e-07, + "loss": 0.4342, + "step": 12246 + }, + { + "epoch": 0.87, + "grad_norm": 1.692950823141219, + "learning_rate": 4.428346656726995e-07, + "loss": 0.4681, + "step": 12247 + }, + { + "epoch": 0.87, + "grad_norm": 2.079898153823917, + "learning_rate": 4.42361963159193e-07, + "loss": 0.5173, + "step": 12248 + }, + { + "epoch": 0.87, + "grad_norm": 1.9526160834612902, + "learning_rate": 4.41889501395571e-07, + "loss": 0.5396, + "step": 12249 + }, + { + "epoch": 0.87, + "grad_norm": 1.8923973714122537, + "learning_rate": 4.4141728040679067e-07, + "loss": 0.4905, + "step": 12250 + }, + { + "epoch": 0.87, + "grad_norm": 1.9011127460172248, + "learning_rate": 4.4094530021779714e-07, + "loss": 0.5351, + "step": 12251 + }, + { + "epoch": 0.87, + "grad_norm": 1.5921535621640754, + "learning_rate": 4.4047356085352045e-07, + "loss": 0.5411, + "step": 12252 + }, + { + "epoch": 0.87, + "grad_norm": 1.689882143868564, + "learning_rate": 4.4000206233887955e-07, + "loss": 0.5313, + "step": 12253 + }, + { + "epoch": 0.87, + "grad_norm": 2.1397025032804633, + "learning_rate": 4.395308046987812e-07, + "loss": 0.5364, + "step": 12254 + }, + { + "epoch": 0.87, + "grad_norm": 1.63535928825643, + "learning_rate": 4.3905978795811934e-07, + "loss": 0.5691, + "step": 12255 + }, + { + "epoch": 0.87, + "grad_norm": 1.6835969788884702, + "learning_rate": 4.3858901214177354e-07, + "loss": 0.5039, + "step": 12256 + }, + { + "epoch": 0.87, + "grad_norm": 1.842927103005912, + "learning_rate": 4.3811847727461167e-07, + "loss": 0.5241, + "step": 12257 + }, + { + "epoch": 0.87, + "grad_norm": 1.6631032621437394, + "learning_rate": 4.3764818338149107e-07, + "loss": 0.5627, + "step": 12258 + }, + { + "epoch": 0.87, + "grad_norm": 1.6591317516484885, + "learning_rate": 4.371781304872513e-07, + "loss": 0.5228, + "step": 12259 + }, + { + "epoch": 0.87, + "grad_norm": 1.9465438148591414, + "learning_rate": 4.3670831861672536e-07, + "loss": 0.5361, + "step": 12260 + }, + { + "epoch": 0.87, + "grad_norm": 1.5162096590093637, + "learning_rate": 4.3623874779472665e-07, + "loss": 0.498, + "step": 12261 + }, + { + "epoch": 0.87, + "grad_norm": 1.6739803453075528, + "learning_rate": 4.357694180460631e-07, + "loss": 0.557, + "step": 12262 + }, + { + "epoch": 0.87, + "grad_norm": 1.5927258345142263, + "learning_rate": 4.3530032939552327e-07, + "loss": 0.4653, + "step": 12263 + }, + { + "epoch": 0.87, + "grad_norm": 0.6465400099571806, + "learning_rate": 4.348314818678878e-07, + "loss": 0.4095, + "step": 12264 + }, + { + "epoch": 0.87, + "grad_norm": 1.7275165048921362, + "learning_rate": 4.3436287548792256e-07, + "loss": 0.4957, + "step": 12265 + }, + { + "epoch": 0.87, + "grad_norm": 1.906791659489699, + "learning_rate": 4.3389451028038043e-07, + "loss": 0.5369, + "step": 12266 + }, + { + "epoch": 0.87, + "grad_norm": 2.0287484465906287, + "learning_rate": 4.3342638627000335e-07, + "loss": 0.553, + "step": 12267 + }, + { + "epoch": 0.87, + "grad_norm": 1.7400319804281201, + "learning_rate": 4.329585034815165e-07, + "loss": 0.5367, + "step": 12268 + }, + { + "epoch": 0.87, + "grad_norm": 1.6177641632109991, + "learning_rate": 4.324908619396384e-07, + "loss": 0.45, + "step": 12269 + }, + { + "epoch": 0.87, + "grad_norm": 1.4682015978453395, + "learning_rate": 4.320234616690688e-07, + "loss": 0.486, + "step": 12270 + }, + { + "epoch": 0.87, + "grad_norm": 1.7116653087007963, + "learning_rate": 4.315563026944985e-07, + "loss": 0.5244, + "step": 12271 + }, + { + "epoch": 0.87, + "grad_norm": 1.6287654375233782, + "learning_rate": 4.310893850406039e-07, + "loss": 0.5579, + "step": 12272 + }, + { + "epoch": 0.87, + "grad_norm": 1.8989764135222662, + "learning_rate": 4.3062270873205024e-07, + "loss": 0.4612, + "step": 12273 + }, + { + "epoch": 0.87, + "grad_norm": 1.5911339482131992, + "learning_rate": 4.301562737934889e-07, + "loss": 0.4887, + "step": 12274 + }, + { + "epoch": 0.87, + "grad_norm": 0.6612568152977363, + "learning_rate": 4.296900802495568e-07, + "loss": 0.4159, + "step": 12275 + }, + { + "epoch": 0.87, + "grad_norm": 1.7623336448041877, + "learning_rate": 4.29224128124881e-07, + "loss": 0.5346, + "step": 12276 + }, + { + "epoch": 0.87, + "grad_norm": 2.3374360666364997, + "learning_rate": 4.287584174440751e-07, + "loss": 0.5649, + "step": 12277 + }, + { + "epoch": 0.87, + "grad_norm": 1.9758723873980755, + "learning_rate": 4.282929482317388e-07, + "loss": 0.5562, + "step": 12278 + }, + { + "epoch": 0.87, + "grad_norm": 2.087429678777001, + "learning_rate": 4.278277205124598e-07, + "loss": 0.542, + "step": 12279 + }, + { + "epoch": 0.87, + "grad_norm": 1.785891089761743, + "learning_rate": 4.2736273431081387e-07, + "loss": 0.5064, + "step": 12280 + }, + { + "epoch": 0.87, + "grad_norm": 0.7248866277052052, + "learning_rate": 4.268979896513631e-07, + "loss": 0.4105, + "step": 12281 + }, + { + "epoch": 0.87, + "grad_norm": 1.6706680838494201, + "learning_rate": 4.26433486558655e-07, + "loss": 0.5291, + "step": 12282 + }, + { + "epoch": 0.87, + "grad_norm": 1.5806722391571353, + "learning_rate": 4.259692250572278e-07, + "loss": 0.4914, + "step": 12283 + }, + { + "epoch": 0.87, + "grad_norm": 1.9580104408239736, + "learning_rate": 4.255052051716052e-07, + "loss": 0.5083, + "step": 12284 + }, + { + "epoch": 0.87, + "grad_norm": 1.9737839122823915, + "learning_rate": 4.250414269262981e-07, + "loss": 0.6239, + "step": 12285 + }, + { + "epoch": 0.87, + "grad_norm": 2.207022811008975, + "learning_rate": 4.245778903458059e-07, + "loss": 0.4542, + "step": 12286 + }, + { + "epoch": 0.87, + "grad_norm": 1.6910806069778506, + "learning_rate": 4.241145954546111e-07, + "loss": 0.4907, + "step": 12287 + }, + { + "epoch": 0.87, + "grad_norm": 1.8890750790770736, + "learning_rate": 4.236515422771903e-07, + "loss": 0.496, + "step": 12288 + }, + { + "epoch": 0.87, + "grad_norm": 1.4520931225977063, + "learning_rate": 4.2318873083800116e-07, + "loss": 0.4742, + "step": 12289 + }, + { + "epoch": 0.87, + "grad_norm": 1.8167224705270189, + "learning_rate": 4.2272616116149247e-07, + "loss": 0.4729, + "step": 12290 + }, + { + "epoch": 0.87, + "grad_norm": 1.8709470370839907, + "learning_rate": 4.222638332720963e-07, + "loss": 0.5184, + "step": 12291 + }, + { + "epoch": 0.87, + "grad_norm": 1.8938117463681103, + "learning_rate": 4.218017471942376e-07, + "loss": 0.5748, + "step": 12292 + }, + { + "epoch": 0.87, + "grad_norm": 1.6356292894508706, + "learning_rate": 4.213399029523224e-07, + "loss": 0.5168, + "step": 12293 + }, + { + "epoch": 0.87, + "grad_norm": 1.8054411212679735, + "learning_rate": 4.208783005707484e-07, + "loss": 0.5293, + "step": 12294 + }, + { + "epoch": 0.87, + "grad_norm": 1.8252076467676241, + "learning_rate": 4.204169400738989e-07, + "loss": 0.5596, + "step": 12295 + }, + { + "epoch": 0.87, + "grad_norm": 2.3584863405776257, + "learning_rate": 4.1995582148614433e-07, + "loss": 0.5066, + "step": 12296 + }, + { + "epoch": 0.87, + "grad_norm": 2.2935675552810766, + "learning_rate": 4.1949494483184363e-07, + "loss": 0.4997, + "step": 12297 + }, + { + "epoch": 0.87, + "grad_norm": 1.9805818345796364, + "learning_rate": 4.190343101353389e-07, + "loss": 0.586, + "step": 12298 + }, + { + "epoch": 0.87, + "grad_norm": 1.7307053773846368, + "learning_rate": 4.185739174209658e-07, + "loss": 0.5004, + "step": 12299 + }, + { + "epoch": 0.87, + "grad_norm": 1.7025069588976744, + "learning_rate": 4.1811376671304195e-07, + "loss": 0.5105, + "step": 12300 + }, + { + "epoch": 0.87, + "grad_norm": 0.6912325697982562, + "learning_rate": 4.176538580358741e-07, + "loss": 0.4124, + "step": 12301 + }, + { + "epoch": 0.87, + "grad_norm": 2.2755802706622754, + "learning_rate": 4.171941914137567e-07, + "loss": 0.5517, + "step": 12302 + }, + { + "epoch": 0.87, + "grad_norm": 2.1237433499663783, + "learning_rate": 4.167347668709709e-07, + "loss": 0.4921, + "step": 12303 + }, + { + "epoch": 0.87, + "grad_norm": 2.1514041190081286, + "learning_rate": 4.162755844317856e-07, + "loss": 0.5254, + "step": 12304 + }, + { + "epoch": 0.87, + "grad_norm": 2.0086974201144505, + "learning_rate": 4.1581664412045473e-07, + "loss": 0.4822, + "step": 12305 + }, + { + "epoch": 0.87, + "grad_norm": 4.464369031659622, + "learning_rate": 4.1535794596122224e-07, + "loss": 0.4892, + "step": 12306 + }, + { + "epoch": 0.87, + "grad_norm": 1.6954258560785411, + "learning_rate": 4.1489948997831817e-07, + "loss": 0.4815, + "step": 12307 + }, + { + "epoch": 0.87, + "grad_norm": 2.3283447605092906, + "learning_rate": 4.144412761959593e-07, + "loss": 0.5468, + "step": 12308 + }, + { + "epoch": 0.87, + "grad_norm": 1.7100903620642396, + "learning_rate": 4.139833046383512e-07, + "loss": 0.4748, + "step": 12309 + }, + { + "epoch": 0.87, + "grad_norm": 0.6934287379856986, + "learning_rate": 4.135255753296824e-07, + "loss": 0.4439, + "step": 12310 + }, + { + "epoch": 0.87, + "grad_norm": 1.6186559646451069, + "learning_rate": 4.130680882941357e-07, + "loss": 0.5427, + "step": 12311 + }, + { + "epoch": 0.87, + "grad_norm": 2.2235112302383553, + "learning_rate": 4.1261084355587456e-07, + "loss": 0.5495, + "step": 12312 + }, + { + "epoch": 0.87, + "grad_norm": 1.8935765637379058, + "learning_rate": 4.1215384113905243e-07, + "loss": 0.4948, + "step": 12313 + }, + { + "epoch": 0.87, + "grad_norm": 1.6105856625539303, + "learning_rate": 4.116970810678106e-07, + "loss": 0.5256, + "step": 12314 + }, + { + "epoch": 0.87, + "grad_norm": 1.7277794367597874, + "learning_rate": 4.1124056336627637e-07, + "loss": 0.4926, + "step": 12315 + }, + { + "epoch": 0.87, + "grad_norm": 1.5711753052031339, + "learning_rate": 4.1078428805856494e-07, + "loss": 0.5224, + "step": 12316 + }, + { + "epoch": 0.87, + "grad_norm": 1.8779856758083189, + "learning_rate": 4.1032825516877594e-07, + "loss": 0.4823, + "step": 12317 + }, + { + "epoch": 0.87, + "grad_norm": 1.462142314539904, + "learning_rate": 4.098724647210023e-07, + "loss": 0.444, + "step": 12318 + }, + { + "epoch": 0.87, + "grad_norm": 1.5439116639856993, + "learning_rate": 4.0941691673931805e-07, + "loss": 0.5235, + "step": 12319 + }, + { + "epoch": 0.87, + "grad_norm": 1.7193245307132088, + "learning_rate": 4.089616112477873e-07, + "loss": 0.5443, + "step": 12320 + }, + { + "epoch": 0.87, + "grad_norm": 2.4919994933615004, + "learning_rate": 4.0850654827045976e-07, + "loss": 0.4858, + "step": 12321 + }, + { + "epoch": 0.87, + "grad_norm": 1.750670013964222, + "learning_rate": 4.0805172783137615e-07, + "loss": 0.548, + "step": 12322 + }, + { + "epoch": 0.87, + "grad_norm": 1.6649224801048146, + "learning_rate": 4.075971499545589e-07, + "loss": 0.4979, + "step": 12323 + }, + { + "epoch": 0.87, + "grad_norm": 1.5990137799288404, + "learning_rate": 4.071428146640216e-07, + "loss": 0.5414, + "step": 12324 + }, + { + "epoch": 0.87, + "grad_norm": 1.5455490444534017, + "learning_rate": 4.066887219837629e-07, + "loss": 0.4763, + "step": 12325 + }, + { + "epoch": 0.87, + "grad_norm": 1.658673562007029, + "learning_rate": 4.0623487193777066e-07, + "loss": 0.4881, + "step": 12326 + }, + { + "epoch": 0.87, + "grad_norm": 1.6995465491355064, + "learning_rate": 4.057812645500192e-07, + "loss": 0.518, + "step": 12327 + }, + { + "epoch": 0.87, + "grad_norm": 0.6560348724599131, + "learning_rate": 4.0532789984446707e-07, + "loss": 0.4032, + "step": 12328 + }, + { + "epoch": 0.87, + "grad_norm": 1.877197611851411, + "learning_rate": 4.048747778450657e-07, + "loss": 0.4528, + "step": 12329 + }, + { + "epoch": 0.87, + "grad_norm": 2.1616460892038902, + "learning_rate": 4.044218985757481e-07, + "loss": 0.5265, + "step": 12330 + }, + { + "epoch": 0.88, + "grad_norm": 1.682650648601404, + "learning_rate": 4.039692620604374e-07, + "loss": 0.5066, + "step": 12331 + }, + { + "epoch": 0.88, + "grad_norm": 1.9878494950659022, + "learning_rate": 4.0351686832304447e-07, + "loss": 0.5256, + "step": 12332 + }, + { + "epoch": 0.88, + "grad_norm": 2.04621093817879, + "learning_rate": 4.0306471738746466e-07, + "loss": 0.4901, + "step": 12333 + }, + { + "epoch": 0.88, + "grad_norm": 1.7178782599220441, + "learning_rate": 4.0261280927758493e-07, + "loss": 0.4667, + "step": 12334 + }, + { + "epoch": 0.88, + "grad_norm": 1.5487204373965842, + "learning_rate": 4.021611440172729e-07, + "loss": 0.5116, + "step": 12335 + }, + { + "epoch": 0.88, + "grad_norm": 1.8929941708512685, + "learning_rate": 4.0170972163038946e-07, + "loss": 0.5034, + "step": 12336 + }, + { + "epoch": 0.88, + "grad_norm": 0.6810709474104447, + "learning_rate": 4.012585421407794e-07, + "loss": 0.4273, + "step": 12337 + }, + { + "epoch": 0.88, + "grad_norm": 1.7406537935740145, + "learning_rate": 4.0080760557227594e-07, + "loss": 0.5419, + "step": 12338 + }, + { + "epoch": 0.88, + "grad_norm": 1.5179733566998834, + "learning_rate": 4.0035691194869944e-07, + "loss": 0.4991, + "step": 12339 + }, + { + "epoch": 0.88, + "grad_norm": 2.0735403896018023, + "learning_rate": 3.999064612938552e-07, + "loss": 0.5039, + "step": 12340 + }, + { + "epoch": 0.88, + "grad_norm": 1.5607821889608993, + "learning_rate": 3.994562536315405e-07, + "loss": 0.5268, + "step": 12341 + }, + { + "epoch": 0.88, + "grad_norm": 1.4607944741470478, + "learning_rate": 3.990062889855345e-07, + "loss": 0.5141, + "step": 12342 + }, + { + "epoch": 0.88, + "grad_norm": 0.6663512421304779, + "learning_rate": 3.985565673796066e-07, + "loss": 0.4145, + "step": 12343 + }, + { + "epoch": 0.88, + "grad_norm": 1.677430124330887, + "learning_rate": 3.981070888375127e-07, + "loss": 0.5154, + "step": 12344 + }, + { + "epoch": 0.88, + "grad_norm": 2.450724520146816, + "learning_rate": 3.9765785338299555e-07, + "loss": 0.4993, + "step": 12345 + }, + { + "epoch": 0.88, + "grad_norm": 2.034613627351141, + "learning_rate": 3.972088610397867e-07, + "loss": 0.4773, + "step": 12346 + }, + { + "epoch": 0.88, + "grad_norm": 1.5410983761597516, + "learning_rate": 3.967601118316e-07, + "loss": 0.5031, + "step": 12347 + }, + { + "epoch": 0.88, + "grad_norm": 1.797640787687162, + "learning_rate": 3.963116057821437e-07, + "loss": 0.5219, + "step": 12348 + }, + { + "epoch": 0.88, + "grad_norm": 1.878816099283627, + "learning_rate": 3.9586334291510766e-07, + "loss": 0.5001, + "step": 12349 + }, + { + "epoch": 0.88, + "grad_norm": 1.7339217593321463, + "learning_rate": 3.9541532325417077e-07, + "loss": 0.5753, + "step": 12350 + }, + { + "epoch": 0.88, + "grad_norm": 1.676553841615516, + "learning_rate": 3.94967546822998e-07, + "loss": 0.4804, + "step": 12351 + }, + { + "epoch": 0.88, + "grad_norm": 2.2676220256648456, + "learning_rate": 3.945200136452437e-07, + "loss": 0.516, + "step": 12352 + }, + { + "epoch": 0.88, + "grad_norm": 1.6343944176420924, + "learning_rate": 3.9407272374454907e-07, + "loss": 0.4806, + "step": 12353 + }, + { + "epoch": 0.88, + "grad_norm": 1.79163437445112, + "learning_rate": 3.93625677144539e-07, + "loss": 0.4491, + "step": 12354 + }, + { + "epoch": 0.88, + "grad_norm": 2.454971829469113, + "learning_rate": 3.9317887386882914e-07, + "loss": 0.5025, + "step": 12355 + }, + { + "epoch": 0.88, + "grad_norm": 6.7381801972971225, + "learning_rate": 3.9273231394102165e-07, + "loss": 0.5194, + "step": 12356 + }, + { + "epoch": 0.88, + "grad_norm": 0.6835995843702123, + "learning_rate": 3.922859973847054e-07, + "loss": 0.4325, + "step": 12357 + }, + { + "epoch": 0.88, + "grad_norm": 1.8191071197350734, + "learning_rate": 3.918399242234544e-07, + "loss": 0.4665, + "step": 12358 + }, + { + "epoch": 0.88, + "grad_norm": 9.739378104672744, + "learning_rate": 3.9139409448083423e-07, + "loss": 0.5659, + "step": 12359 + }, + { + "epoch": 0.88, + "grad_norm": 1.9453520806401217, + "learning_rate": 3.9094850818039375e-07, + "loss": 0.5744, + "step": 12360 + }, + { + "epoch": 0.88, + "grad_norm": 1.8102387251044818, + "learning_rate": 3.905031653456709e-07, + "loss": 0.4996, + "step": 12361 + }, + { + "epoch": 0.88, + "grad_norm": 1.7246932910855062, + "learning_rate": 3.900580660001896e-07, + "loss": 0.4998, + "step": 12362 + }, + { + "epoch": 0.88, + "grad_norm": 1.761760579505079, + "learning_rate": 3.8961321016746154e-07, + "loss": 0.5111, + "step": 12363 + }, + { + "epoch": 0.88, + "grad_norm": 1.871385728970166, + "learning_rate": 3.891685978709875e-07, + "loss": 0.4967, + "step": 12364 + }, + { + "epoch": 0.88, + "grad_norm": 1.7244918727396341, + "learning_rate": 3.887242291342502e-07, + "loss": 0.5244, + "step": 12365 + }, + { + "epoch": 0.88, + "grad_norm": 1.853364956467938, + "learning_rate": 3.882801039807249e-07, + "loss": 0.4808, + "step": 12366 + }, + { + "epoch": 0.88, + "grad_norm": 1.5377428896429148, + "learning_rate": 3.878362224338705e-07, + "loss": 0.4386, + "step": 12367 + }, + { + "epoch": 0.88, + "grad_norm": 2.020381619343402, + "learning_rate": 3.8739258451713503e-07, + "loss": 0.5171, + "step": 12368 + }, + { + "epoch": 0.88, + "grad_norm": 1.5360068628012573, + "learning_rate": 3.8694919025395406e-07, + "loss": 0.5517, + "step": 12369 + }, + { + "epoch": 0.88, + "grad_norm": 1.8456188830840825, + "learning_rate": 3.8650603966774616e-07, + "loss": 0.4967, + "step": 12370 + }, + { + "epoch": 0.88, + "grad_norm": 1.6692377635423776, + "learning_rate": 3.860631327819236e-07, + "loss": 0.5351, + "step": 12371 + }, + { + "epoch": 0.88, + "grad_norm": 1.5362116013174432, + "learning_rate": 3.856204696198801e-07, + "loss": 0.5051, + "step": 12372 + }, + { + "epoch": 0.88, + "grad_norm": 1.5957596664982405, + "learning_rate": 3.851780502049984e-07, + "loss": 0.509, + "step": 12373 + }, + { + "epoch": 0.88, + "grad_norm": 0.6793994194976533, + "learning_rate": 3.847358745606494e-07, + "loss": 0.4167, + "step": 12374 + }, + { + "epoch": 0.88, + "grad_norm": 1.8869545861724444, + "learning_rate": 3.842939427101905e-07, + "loss": 0.5673, + "step": 12375 + }, + { + "epoch": 0.88, + "grad_norm": 1.8232148487435595, + "learning_rate": 3.838522546769663e-07, + "loss": 0.5173, + "step": 12376 + }, + { + "epoch": 0.88, + "grad_norm": 2.4242606864126355, + "learning_rate": 3.8341081048430595e-07, + "loss": 0.4875, + "step": 12377 + }, + { + "epoch": 0.88, + "grad_norm": 1.6861368596826092, + "learning_rate": 3.8296961015553135e-07, + "loss": 0.5315, + "step": 12378 + }, + { + "epoch": 0.88, + "grad_norm": 1.685632591992498, + "learning_rate": 3.825286537139461e-07, + "loss": 0.4667, + "step": 12379 + }, + { + "epoch": 0.88, + "grad_norm": 0.7371131282537533, + "learning_rate": 3.820879411828432e-07, + "loss": 0.4462, + "step": 12380 + }, + { + "epoch": 0.88, + "grad_norm": 0.7428880403741701, + "learning_rate": 3.816474725855029e-07, + "loss": 0.4357, + "step": 12381 + }, + { + "epoch": 0.88, + "grad_norm": 1.7594881645476148, + "learning_rate": 3.812072479451917e-07, + "loss": 0.5943, + "step": 12382 + }, + { + "epoch": 0.88, + "grad_norm": 0.694971112377807, + "learning_rate": 3.807672672851659e-07, + "loss": 0.4031, + "step": 12383 + }, + { + "epoch": 0.88, + "grad_norm": 0.6452036034650921, + "learning_rate": 3.803275306286641e-07, + "loss": 0.4143, + "step": 12384 + }, + { + "epoch": 0.88, + "grad_norm": 1.6953471493992103, + "learning_rate": 3.798880379989156e-07, + "loss": 0.5851, + "step": 12385 + }, + { + "epoch": 0.88, + "grad_norm": 1.8357923925893973, + "learning_rate": 3.7944878941913565e-07, + "loss": 0.5174, + "step": 12386 + }, + { + "epoch": 0.88, + "grad_norm": 1.8087467135469366, + "learning_rate": 3.790097849125285e-07, + "loss": 0.5639, + "step": 12387 + }, + { + "epoch": 0.88, + "grad_norm": 1.8112305733299847, + "learning_rate": 3.785710245022817e-07, + "loss": 0.4674, + "step": 12388 + }, + { + "epoch": 0.88, + "grad_norm": 1.6657893430709256, + "learning_rate": 3.781325082115722e-07, + "loss": 0.4961, + "step": 12389 + }, + { + "epoch": 0.88, + "grad_norm": 2.200450011566859, + "learning_rate": 3.77694236063566e-07, + "loss": 0.4959, + "step": 12390 + }, + { + "epoch": 0.88, + "grad_norm": 1.5768621263312423, + "learning_rate": 3.772562080814124e-07, + "loss": 0.4773, + "step": 12391 + }, + { + "epoch": 0.88, + "grad_norm": 2.1351162564871666, + "learning_rate": 3.7681842428824945e-07, + "loss": 0.4741, + "step": 12392 + }, + { + "epoch": 0.88, + "grad_norm": 0.6545433999582903, + "learning_rate": 3.763808847072026e-07, + "loss": 0.4378, + "step": 12393 + }, + { + "epoch": 0.88, + "grad_norm": 1.4586527030671088, + "learning_rate": 3.759435893613855e-07, + "loss": 0.4448, + "step": 12394 + }, + { + "epoch": 0.88, + "grad_norm": 1.6488007271663436, + "learning_rate": 3.7550653827389593e-07, + "loss": 0.5407, + "step": 12395 + }, + { + "epoch": 0.88, + "grad_norm": 1.78614659047331, + "learning_rate": 3.7506973146782033e-07, + "loss": 0.5174, + "step": 12396 + }, + { + "epoch": 0.88, + "grad_norm": 1.5869179742485857, + "learning_rate": 3.746331689662336e-07, + "loss": 0.4909, + "step": 12397 + }, + { + "epoch": 0.88, + "grad_norm": 0.7167899871569551, + "learning_rate": 3.7419685079219504e-07, + "loss": 0.4332, + "step": 12398 + }, + { + "epoch": 0.88, + "grad_norm": 1.7325139187670855, + "learning_rate": 3.737607769687546e-07, + "loss": 0.5119, + "step": 12399 + }, + { + "epoch": 0.88, + "grad_norm": 1.518656376330653, + "learning_rate": 3.7332494751894374e-07, + "loss": 0.4998, + "step": 12400 + }, + { + "epoch": 0.88, + "grad_norm": 4.5839432087451755, + "learning_rate": 3.728893624657881e-07, + "loss": 0.5029, + "step": 12401 + }, + { + "epoch": 0.88, + "grad_norm": 1.7491849170824432, + "learning_rate": 3.7245402183229474e-07, + "loss": 0.5084, + "step": 12402 + }, + { + "epoch": 0.88, + "grad_norm": 1.6118194449607304, + "learning_rate": 3.720189256414597e-07, + "loss": 0.4714, + "step": 12403 + }, + { + "epoch": 0.88, + "grad_norm": 2.012701586294835, + "learning_rate": 3.7158407391626683e-07, + "loss": 0.4573, + "step": 12404 + }, + { + "epoch": 0.88, + "grad_norm": 1.8274193145480169, + "learning_rate": 3.7114946667968667e-07, + "loss": 0.5245, + "step": 12405 + }, + { + "epoch": 0.88, + "grad_norm": 2.3052606094070516, + "learning_rate": 3.707151039546775e-07, + "loss": 0.5085, + "step": 12406 + }, + { + "epoch": 0.88, + "grad_norm": 4.339859599357677, + "learning_rate": 3.7028098576418034e-07, + "loss": 0.5651, + "step": 12407 + }, + { + "epoch": 0.88, + "grad_norm": 2.0773581227143127, + "learning_rate": 3.698471121311309e-07, + "loss": 0.4958, + "step": 12408 + }, + { + "epoch": 0.88, + "grad_norm": 1.6926047370164214, + "learning_rate": 3.694134830784457e-07, + "loss": 0.5011, + "step": 12409 + }, + { + "epoch": 0.88, + "grad_norm": 1.8423370970370172, + "learning_rate": 3.689800986290304e-07, + "loss": 0.5254, + "step": 12410 + }, + { + "epoch": 0.88, + "grad_norm": 1.7719328847206304, + "learning_rate": 3.685469588057783e-07, + "loss": 0.5067, + "step": 12411 + }, + { + "epoch": 0.88, + "grad_norm": 2.6132454858073855, + "learning_rate": 3.6811406363157e-07, + "loss": 0.4909, + "step": 12412 + }, + { + "epoch": 0.88, + "grad_norm": 1.497438205349686, + "learning_rate": 3.6768141312927217e-07, + "loss": 0.5268, + "step": 12413 + }, + { + "epoch": 0.88, + "grad_norm": 1.6179937478298771, + "learning_rate": 3.6724900732173774e-07, + "loss": 0.5764, + "step": 12414 + }, + { + "epoch": 0.88, + "grad_norm": 2.27646366200564, + "learning_rate": 3.6681684623180893e-07, + "loss": 0.4827, + "step": 12415 + }, + { + "epoch": 0.88, + "grad_norm": 1.6314592158727765, + "learning_rate": 3.66384929882313e-07, + "loss": 0.599, + "step": 12416 + }, + { + "epoch": 0.88, + "grad_norm": 1.5833870067193596, + "learning_rate": 3.659532582960662e-07, + "loss": 0.4617, + "step": 12417 + }, + { + "epoch": 0.88, + "grad_norm": 1.5874451825825013, + "learning_rate": 3.6552183149587196e-07, + "loss": 0.556, + "step": 12418 + }, + { + "epoch": 0.88, + "grad_norm": 1.5596956806720983, + "learning_rate": 3.6509064950451587e-07, + "loss": 0.5315, + "step": 12419 + }, + { + "epoch": 0.88, + "grad_norm": 1.5213866620027063, + "learning_rate": 3.6465971234477925e-07, + "loss": 0.5538, + "step": 12420 + }, + { + "epoch": 0.88, + "grad_norm": 2.9797191332486643, + "learning_rate": 3.642290200394222e-07, + "loss": 0.5342, + "step": 12421 + }, + { + "epoch": 0.88, + "grad_norm": 0.7346952333267825, + "learning_rate": 3.6379857261119645e-07, + "loss": 0.4166, + "step": 12422 + }, + { + "epoch": 0.88, + "grad_norm": 1.4814648298685746, + "learning_rate": 3.633683700828394e-07, + "loss": 0.5038, + "step": 12423 + }, + { + "epoch": 0.88, + "grad_norm": 1.668279687834836, + "learning_rate": 3.629384124770774e-07, + "loss": 0.5282, + "step": 12424 + }, + { + "epoch": 0.88, + "grad_norm": 1.5451790286500853, + "learning_rate": 3.6250869981661994e-07, + "loss": 0.4987, + "step": 12425 + }, + { + "epoch": 0.88, + "grad_norm": 1.6978142515404004, + "learning_rate": 3.620792321241667e-07, + "loss": 0.5147, + "step": 12426 + }, + { + "epoch": 0.88, + "grad_norm": 1.723486978535732, + "learning_rate": 3.6165000942240405e-07, + "loss": 0.5125, + "step": 12427 + }, + { + "epoch": 0.88, + "grad_norm": 1.8489426918227718, + "learning_rate": 3.612210317340048e-07, + "loss": 0.5249, + "step": 12428 + }, + { + "epoch": 0.88, + "grad_norm": 1.696464781440644, + "learning_rate": 3.607922990816298e-07, + "loss": 0.5282, + "step": 12429 + }, + { + "epoch": 0.88, + "grad_norm": 2.34639285998573, + "learning_rate": 3.603638114879238e-07, + "loss": 0.4983, + "step": 12430 + }, + { + "epoch": 0.88, + "grad_norm": 0.6785922916666683, + "learning_rate": 3.599355689755241e-07, + "loss": 0.4287, + "step": 12431 + }, + { + "epoch": 0.88, + "grad_norm": 2.0757421781106697, + "learning_rate": 3.595075715670493e-07, + "loss": 0.5116, + "step": 12432 + }, + { + "epoch": 0.88, + "grad_norm": 0.6019605382837037, + "learning_rate": 3.590798192851086e-07, + "loss": 0.4026, + "step": 12433 + }, + { + "epoch": 0.88, + "grad_norm": 1.7879294373156671, + "learning_rate": 3.5865231215229776e-07, + "loss": 0.4965, + "step": 12434 + }, + { + "epoch": 0.88, + "grad_norm": 1.7388662738236278, + "learning_rate": 3.582250501911988e-07, + "loss": 0.5693, + "step": 12435 + }, + { + "epoch": 0.88, + "grad_norm": 2.0380334737055747, + "learning_rate": 3.5779803342438193e-07, + "loss": 0.4732, + "step": 12436 + }, + { + "epoch": 0.88, + "grad_norm": 1.6636485703583506, + "learning_rate": 3.5737126187440185e-07, + "loss": 0.4671, + "step": 12437 + }, + { + "epoch": 0.88, + "grad_norm": 2.3679527225201427, + "learning_rate": 3.569447355638034e-07, + "loss": 0.4821, + "step": 12438 + }, + { + "epoch": 0.88, + "grad_norm": 1.7588955687651104, + "learning_rate": 3.565184545151168e-07, + "loss": 0.5487, + "step": 12439 + }, + { + "epoch": 0.88, + "grad_norm": 1.5323530818434774, + "learning_rate": 3.5609241875085965e-07, + "loss": 0.4879, + "step": 12440 + }, + { + "epoch": 0.88, + "grad_norm": 1.6351755055533477, + "learning_rate": 3.5566662829353615e-07, + "loss": 0.487, + "step": 12441 + }, + { + "epoch": 0.88, + "grad_norm": 2.0784736969493514, + "learning_rate": 3.552410831656394e-07, + "loss": 0.5075, + "step": 12442 + }, + { + "epoch": 0.88, + "grad_norm": 2.0589603643386694, + "learning_rate": 3.548157833896476e-07, + "loss": 0.4581, + "step": 12443 + }, + { + "epoch": 0.88, + "grad_norm": 2.139761427440226, + "learning_rate": 3.5439072898802495e-07, + "loss": 0.5532, + "step": 12444 + }, + { + "epoch": 0.88, + "grad_norm": 1.7699920421652853, + "learning_rate": 3.5396591998322574e-07, + "loss": 0.5323, + "step": 12445 + }, + { + "epoch": 0.88, + "grad_norm": 1.8319637874433377, + "learning_rate": 3.535413563976897e-07, + "loss": 0.5366, + "step": 12446 + }, + { + "epoch": 0.88, + "grad_norm": 1.752406238242355, + "learning_rate": 3.5311703825384347e-07, + "loss": 0.5967, + "step": 12447 + }, + { + "epoch": 0.88, + "grad_norm": 2.6152648887778565, + "learning_rate": 3.526929655741024e-07, + "loss": 0.4809, + "step": 12448 + }, + { + "epoch": 0.88, + "grad_norm": 2.126123427340597, + "learning_rate": 3.522691383808635e-07, + "loss": 0.4712, + "step": 12449 + }, + { + "epoch": 0.88, + "grad_norm": 1.8321299088960519, + "learning_rate": 3.518455566965195e-07, + "loss": 0.5394, + "step": 12450 + }, + { + "epoch": 0.88, + "grad_norm": 1.7009862437164112, + "learning_rate": 3.5142222054344253e-07, + "loss": 0.4966, + "step": 12451 + }, + { + "epoch": 0.88, + "grad_norm": 0.6863300546085954, + "learning_rate": 3.509991299439963e-07, + "loss": 0.4145, + "step": 12452 + }, + { + "epoch": 0.88, + "grad_norm": 2.587300818150453, + "learning_rate": 3.5057628492052743e-07, + "loss": 0.5777, + "step": 12453 + }, + { + "epoch": 0.88, + "grad_norm": 1.548722795555203, + "learning_rate": 3.501536854953752e-07, + "loss": 0.468, + "step": 12454 + }, + { + "epoch": 0.88, + "grad_norm": 2.726629852185792, + "learning_rate": 3.4973133169086014e-07, + "loss": 0.511, + "step": 12455 + }, + { + "epoch": 0.88, + "grad_norm": 1.6378154859726575, + "learning_rate": 3.493092235292922e-07, + "loss": 0.5101, + "step": 12456 + }, + { + "epoch": 0.88, + "grad_norm": 0.6626688404897362, + "learning_rate": 3.488873610329718e-07, + "loss": 0.4165, + "step": 12457 + }, + { + "epoch": 0.88, + "grad_norm": 1.6618381522316965, + "learning_rate": 3.484657442241807e-07, + "loss": 0.5403, + "step": 12458 + }, + { + "epoch": 0.88, + "grad_norm": 1.7796960568769282, + "learning_rate": 3.480443731251909e-07, + "loss": 0.5159, + "step": 12459 + }, + { + "epoch": 0.88, + "grad_norm": 1.755099758067953, + "learning_rate": 3.476232477582586e-07, + "loss": 0.5671, + "step": 12460 + }, + { + "epoch": 0.88, + "grad_norm": 1.5727271177203757, + "learning_rate": 3.472023681456321e-07, + "loss": 0.5142, + "step": 12461 + }, + { + "epoch": 0.88, + "grad_norm": 1.4914597753875545, + "learning_rate": 3.4678173430954197e-07, + "loss": 0.4437, + "step": 12462 + }, + { + "epoch": 0.88, + "grad_norm": 1.7877285138940051, + "learning_rate": 3.4636134627220817e-07, + "loss": 0.5335, + "step": 12463 + }, + { + "epoch": 0.88, + "grad_norm": 1.7412223351900527, + "learning_rate": 3.459412040558363e-07, + "loss": 0.5258, + "step": 12464 + }, + { + "epoch": 0.88, + "grad_norm": 1.7676650122398736, + "learning_rate": 3.4552130768262027e-07, + "loss": 0.5663, + "step": 12465 + }, + { + "epoch": 0.88, + "grad_norm": 2.5247326306134914, + "learning_rate": 3.451016571747412e-07, + "loss": 0.4842, + "step": 12466 + }, + { + "epoch": 0.88, + "grad_norm": 1.6785787388521347, + "learning_rate": 3.446822525543647e-07, + "loss": 0.4747, + "step": 12467 + }, + { + "epoch": 0.88, + "grad_norm": 1.8362940892368056, + "learning_rate": 3.4426309384364586e-07, + "loss": 0.5535, + "step": 12468 + }, + { + "epoch": 0.88, + "grad_norm": 1.6958188642803864, + "learning_rate": 3.438441810647258e-07, + "loss": 0.5281, + "step": 12469 + }, + { + "epoch": 0.88, + "grad_norm": 1.8095019597230562, + "learning_rate": 3.434255142397341e-07, + "loss": 0.5509, + "step": 12470 + }, + { + "epoch": 0.88, + "grad_norm": 1.7017073577840283, + "learning_rate": 3.4300709339078473e-07, + "loss": 0.5614, + "step": 12471 + }, + { + "epoch": 0.89, + "grad_norm": 1.8362911467089231, + "learning_rate": 3.425889185399811e-07, + "loss": 0.5584, + "step": 12472 + }, + { + "epoch": 0.89, + "grad_norm": 1.8493286477648103, + "learning_rate": 3.4217098970941274e-07, + "loss": 0.5363, + "step": 12473 + }, + { + "epoch": 0.89, + "grad_norm": 1.8083878695632642, + "learning_rate": 3.417533069211554e-07, + "loss": 0.5438, + "step": 12474 + }, + { + "epoch": 0.89, + "grad_norm": 1.8030305332635188, + "learning_rate": 3.4133587019727245e-07, + "loss": 0.4956, + "step": 12475 + }, + { + "epoch": 0.89, + "grad_norm": 1.5551236268686666, + "learning_rate": 3.4091867955981406e-07, + "loss": 0.4202, + "step": 12476 + }, + { + "epoch": 0.89, + "grad_norm": 2.3524658390371815, + "learning_rate": 3.405017350308187e-07, + "loss": 0.5679, + "step": 12477 + }, + { + "epoch": 0.89, + "grad_norm": 0.6183407373668105, + "learning_rate": 3.4008503663231053e-07, + "loss": 0.4152, + "step": 12478 + }, + { + "epoch": 0.89, + "grad_norm": 2.121337402537972, + "learning_rate": 3.3966858438629956e-07, + "loss": 0.4785, + "step": 12479 + }, + { + "epoch": 0.89, + "grad_norm": 1.7733972128270377, + "learning_rate": 3.3925237831478663e-07, + "loss": 0.4241, + "step": 12480 + }, + { + "epoch": 0.89, + "grad_norm": 0.7057561401574279, + "learning_rate": 3.388364184397552e-07, + "loss": 0.4483, + "step": 12481 + }, + { + "epoch": 0.89, + "grad_norm": 1.5171669463785225, + "learning_rate": 3.384207047831789e-07, + "loss": 0.4581, + "step": 12482 + }, + { + "epoch": 0.89, + "grad_norm": 1.8045629364767846, + "learning_rate": 3.3800523736701506e-07, + "loss": 0.483, + "step": 12483 + }, + { + "epoch": 0.89, + "grad_norm": 1.8337528399994962, + "learning_rate": 3.3759001621321233e-07, + "loss": 0.5042, + "step": 12484 + }, + { + "epoch": 0.89, + "grad_norm": 0.7194773878095185, + "learning_rate": 3.371750413437042e-07, + "loss": 0.4325, + "step": 12485 + }, + { + "epoch": 0.89, + "grad_norm": 2.002345823706235, + "learning_rate": 3.3676031278040864e-07, + "loss": 0.5186, + "step": 12486 + }, + { + "epoch": 0.89, + "grad_norm": 1.8767107949229984, + "learning_rate": 3.3634583054523604e-07, + "loss": 0.4861, + "step": 12487 + }, + { + "epoch": 0.89, + "grad_norm": 1.7511657216123926, + "learning_rate": 3.3593159466007883e-07, + "loss": 0.5702, + "step": 12488 + }, + { + "epoch": 0.89, + "grad_norm": 1.3964660245304514, + "learning_rate": 3.355176051468195e-07, + "loss": 0.4047, + "step": 12489 + }, + { + "epoch": 0.89, + "grad_norm": 1.719904778481833, + "learning_rate": 3.351038620273239e-07, + "loss": 0.5393, + "step": 12490 + }, + { + "epoch": 0.89, + "grad_norm": 1.904003857101435, + "learning_rate": 3.346903653234507e-07, + "loss": 0.4925, + "step": 12491 + }, + { + "epoch": 0.89, + "grad_norm": 1.6732667012662514, + "learning_rate": 3.3427711505704006e-07, + "loss": 0.4879, + "step": 12492 + }, + { + "epoch": 0.89, + "grad_norm": 1.9238664209323715, + "learning_rate": 3.338641112499219e-07, + "loss": 0.4819, + "step": 12493 + }, + { + "epoch": 0.89, + "grad_norm": 2.2108035981828085, + "learning_rate": 3.3345135392391204e-07, + "loss": 0.5202, + "step": 12494 + }, + { + "epoch": 0.89, + "grad_norm": 1.7747833940273126, + "learning_rate": 3.3303884310081413e-07, + "loss": 0.5157, + "step": 12495 + }, + { + "epoch": 0.89, + "grad_norm": 1.93839944723024, + "learning_rate": 3.326265788024197e-07, + "loss": 0.4953, + "step": 12496 + }, + { + "epoch": 0.89, + "grad_norm": 1.794718864034495, + "learning_rate": 3.3221456105050287e-07, + "loss": 0.4592, + "step": 12497 + }, + { + "epoch": 0.89, + "grad_norm": 1.647197066179309, + "learning_rate": 3.318027898668302e-07, + "loss": 0.5437, + "step": 12498 + }, + { + "epoch": 0.89, + "grad_norm": 2.0725523273814197, + "learning_rate": 3.313912652731521e-07, + "loss": 0.5501, + "step": 12499 + }, + { + "epoch": 0.89, + "grad_norm": 1.5942693681007043, + "learning_rate": 3.309799872912067e-07, + "loss": 0.4966, + "step": 12500 + }, + { + "epoch": 0.89, + "grad_norm": 0.7047569986918572, + "learning_rate": 3.3056895594272e-07, + "loss": 0.3991, + "step": 12501 + }, + { + "epoch": 0.89, + "grad_norm": 1.9848195050935615, + "learning_rate": 3.3015817124940173e-07, + "loss": 0.4841, + "step": 12502 + }, + { + "epoch": 0.89, + "grad_norm": 2.100284456771557, + "learning_rate": 3.297476332329541e-07, + "loss": 0.4913, + "step": 12503 + }, + { + "epoch": 0.89, + "grad_norm": 1.63446412365419, + "learning_rate": 3.293373419150603e-07, + "loss": 0.5858, + "step": 12504 + }, + { + "epoch": 0.89, + "grad_norm": 0.7033022336517203, + "learning_rate": 3.289272973173946e-07, + "loss": 0.3955, + "step": 12505 + }, + { + "epoch": 0.89, + "grad_norm": 1.4432488644418342, + "learning_rate": 3.2851749946161693e-07, + "loss": 0.5152, + "step": 12506 + }, + { + "epoch": 0.89, + "grad_norm": 1.8373940527895882, + "learning_rate": 3.281079483693739e-07, + "loss": 0.5192, + "step": 12507 + }, + { + "epoch": 0.89, + "grad_norm": 2.3338120097010107, + "learning_rate": 3.2769864406230034e-07, + "loss": 0.5112, + "step": 12508 + }, + { + "epoch": 0.89, + "grad_norm": 1.743270895832969, + "learning_rate": 3.2728958656201514e-07, + "loss": 0.5017, + "step": 12509 + }, + { + "epoch": 0.89, + "grad_norm": 1.7438214920199944, + "learning_rate": 3.268807758901288e-07, + "loss": 0.5397, + "step": 12510 + }, + { + "epoch": 0.89, + "grad_norm": 1.6064161706536735, + "learning_rate": 3.2647221206823344e-07, + "loss": 0.5289, + "step": 12511 + }, + { + "epoch": 0.89, + "grad_norm": 1.8111676925005222, + "learning_rate": 3.260638951179118e-07, + "loss": 0.5583, + "step": 12512 + }, + { + "epoch": 0.89, + "grad_norm": 1.4370685474768852, + "learning_rate": 3.256558250607328e-07, + "loss": 0.4416, + "step": 12513 + }, + { + "epoch": 0.89, + "grad_norm": 2.336523808942497, + "learning_rate": 3.2524800191825246e-07, + "loss": 0.4698, + "step": 12514 + }, + { + "epoch": 0.89, + "grad_norm": 2.3897024487036687, + "learning_rate": 3.2484042571201303e-07, + "loss": 0.4804, + "step": 12515 + }, + { + "epoch": 0.89, + "grad_norm": 1.6478874253185325, + "learning_rate": 3.244330964635434e-07, + "loss": 0.503, + "step": 12516 + }, + { + "epoch": 0.89, + "grad_norm": 1.6339097201040622, + "learning_rate": 3.240260141943607e-07, + "loss": 0.5014, + "step": 12517 + }, + { + "epoch": 0.89, + "grad_norm": 1.7549066710919277, + "learning_rate": 3.2361917892596797e-07, + "loss": 0.5667, + "step": 12518 + }, + { + "epoch": 0.89, + "grad_norm": 1.763013971566291, + "learning_rate": 3.232125906798572e-07, + "loss": 0.4682, + "step": 12519 + }, + { + "epoch": 0.89, + "grad_norm": 0.6894127007137747, + "learning_rate": 3.228062494775025e-07, + "loss": 0.4368, + "step": 12520 + }, + { + "epoch": 0.89, + "grad_norm": 1.6539527381569308, + "learning_rate": 3.224001553403716e-07, + "loss": 0.5517, + "step": 12521 + }, + { + "epoch": 0.89, + "grad_norm": 2.1498594872265713, + "learning_rate": 3.2199430828991465e-07, + "loss": 0.5285, + "step": 12522 + }, + { + "epoch": 0.89, + "grad_norm": 1.6025105582973271, + "learning_rate": 3.2158870834756883e-07, + "loss": 0.5227, + "step": 12523 + }, + { + "epoch": 0.89, + "grad_norm": 0.7261506604621331, + "learning_rate": 3.2118335553475987e-07, + "loss": 0.4329, + "step": 12524 + }, + { + "epoch": 0.89, + "grad_norm": 1.8892685018211115, + "learning_rate": 3.2077824987290064e-07, + "loss": 0.553, + "step": 12525 + }, + { + "epoch": 0.89, + "grad_norm": 1.8131354960596509, + "learning_rate": 3.2037339138338953e-07, + "loss": 0.5207, + "step": 12526 + }, + { + "epoch": 0.89, + "grad_norm": 1.658046862623057, + "learning_rate": 3.199687800876128e-07, + "loss": 0.5233, + "step": 12527 + }, + { + "epoch": 0.89, + "grad_norm": 1.762672849967191, + "learning_rate": 3.195644160069428e-07, + "loss": 0.5317, + "step": 12528 + }, + { + "epoch": 0.89, + "grad_norm": 3.5410074322697236, + "learning_rate": 3.191602991627396e-07, + "loss": 0.5455, + "step": 12529 + }, + { + "epoch": 0.89, + "grad_norm": 0.6556822653568161, + "learning_rate": 3.1875642957635065e-07, + "loss": 0.3988, + "step": 12530 + }, + { + "epoch": 0.89, + "grad_norm": 1.8380604320401743, + "learning_rate": 3.1835280726911e-07, + "loss": 0.5576, + "step": 12531 + }, + { + "epoch": 0.89, + "grad_norm": 2.266337012110237, + "learning_rate": 3.1794943226233554e-07, + "loss": 0.4626, + "step": 12532 + }, + { + "epoch": 0.89, + "grad_norm": 1.4843844187881166, + "learning_rate": 3.1754630457733916e-07, + "loss": 0.4907, + "step": 12533 + }, + { + "epoch": 0.89, + "grad_norm": 1.8946230576440088, + "learning_rate": 3.1714342423541266e-07, + "loss": 0.5548, + "step": 12534 + }, + { + "epoch": 0.89, + "grad_norm": 1.8054242669318, + "learning_rate": 3.167407912578374e-07, + "loss": 0.4952, + "step": 12535 + }, + { + "epoch": 0.89, + "grad_norm": 1.879568913579606, + "learning_rate": 3.163384056658836e-07, + "loss": 0.508, + "step": 12536 + }, + { + "epoch": 0.89, + "grad_norm": 1.5317103805395762, + "learning_rate": 3.1593626748080477e-07, + "loss": 0.4839, + "step": 12537 + }, + { + "epoch": 0.89, + "grad_norm": 0.7229205522756116, + "learning_rate": 3.1553437672384557e-07, + "loss": 0.4472, + "step": 12538 + }, + { + "epoch": 0.89, + "grad_norm": 1.5554374743947448, + "learning_rate": 3.151327334162313e-07, + "loss": 0.4902, + "step": 12539 + }, + { + "epoch": 0.89, + "grad_norm": 1.6745436246203391, + "learning_rate": 3.1473133757918275e-07, + "loss": 0.4812, + "step": 12540 + }, + { + "epoch": 0.89, + "grad_norm": 1.7771640619395916, + "learning_rate": 3.1433018923390013e-07, + "loss": 0.5542, + "step": 12541 + }, + { + "epoch": 0.89, + "grad_norm": 1.6762317733017926, + "learning_rate": 3.139292884015738e-07, + "loss": 0.4388, + "step": 12542 + }, + { + "epoch": 0.89, + "grad_norm": 1.5690028516826968, + "learning_rate": 3.1352863510338117e-07, + "loss": 0.496, + "step": 12543 + }, + { + "epoch": 0.89, + "grad_norm": 0.7647092928342583, + "learning_rate": 3.131282293604859e-07, + "loss": 0.4433, + "step": 12544 + }, + { + "epoch": 0.89, + "grad_norm": 1.5617413529898307, + "learning_rate": 3.127280711940395e-07, + "loss": 0.5045, + "step": 12545 + }, + { + "epoch": 0.89, + "grad_norm": 0.7350365501430058, + "learning_rate": 3.123281606251782e-07, + "loss": 0.4554, + "step": 12546 + }, + { + "epoch": 0.89, + "grad_norm": 1.9042160581511811, + "learning_rate": 3.119284976750281e-07, + "loss": 0.5326, + "step": 12547 + }, + { + "epoch": 0.89, + "grad_norm": 1.6322990040816168, + "learning_rate": 3.1152908236469934e-07, + "loss": 0.5239, + "step": 12548 + }, + { + "epoch": 0.89, + "grad_norm": 1.5894684704674582, + "learning_rate": 3.111299147152913e-07, + "loss": 0.5038, + "step": 12549 + }, + { + "epoch": 0.89, + "grad_norm": 1.7244832665357233, + "learning_rate": 3.1073099474789037e-07, + "loss": 0.5489, + "step": 12550 + }, + { + "epoch": 0.89, + "grad_norm": 2.031632484963072, + "learning_rate": 3.103323224835658e-07, + "loss": 0.5945, + "step": 12551 + }, + { + "epoch": 0.89, + "grad_norm": 1.8206248078004774, + "learning_rate": 3.0993389794338027e-07, + "loss": 0.5517, + "step": 12552 + }, + { + "epoch": 0.89, + "grad_norm": 0.639279852441662, + "learning_rate": 3.0953572114837793e-07, + "loss": 0.4262, + "step": 12553 + }, + { + "epoch": 0.89, + "grad_norm": 1.5839584887986864, + "learning_rate": 3.0913779211959206e-07, + "loss": 0.4558, + "step": 12554 + }, + { + "epoch": 0.89, + "grad_norm": 1.905332512413174, + "learning_rate": 3.08740110878043e-07, + "loss": 0.4659, + "step": 12555 + }, + { + "epoch": 0.89, + "grad_norm": 1.6202315624452055, + "learning_rate": 3.0834267744473787e-07, + "loss": 0.4866, + "step": 12556 + }, + { + "epoch": 0.89, + "grad_norm": 1.6106166218712408, + "learning_rate": 3.0794549184066935e-07, + "loss": 0.5382, + "step": 12557 + }, + { + "epoch": 0.89, + "grad_norm": 1.626317132528925, + "learning_rate": 3.0754855408681894e-07, + "loss": 0.5479, + "step": 12558 + }, + { + "epoch": 0.89, + "grad_norm": 1.701276506268353, + "learning_rate": 3.0715186420415435e-07, + "loss": 0.5153, + "step": 12559 + }, + { + "epoch": 0.89, + "grad_norm": 1.8908114580824256, + "learning_rate": 3.067554222136293e-07, + "loss": 0.515, + "step": 12560 + }, + { + "epoch": 0.89, + "grad_norm": 1.8444150181119605, + "learning_rate": 3.063592281361866e-07, + "loss": 0.5675, + "step": 12561 + }, + { + "epoch": 0.89, + "grad_norm": 1.805427172360426, + "learning_rate": 3.0596328199275217e-07, + "loss": 0.5481, + "step": 12562 + }, + { + "epoch": 0.89, + "grad_norm": 1.8880506073447187, + "learning_rate": 3.0556758380424436e-07, + "loss": 0.5078, + "step": 12563 + }, + { + "epoch": 0.89, + "grad_norm": 1.6469998422406427, + "learning_rate": 3.051721335915631e-07, + "loss": 0.464, + "step": 12564 + }, + { + "epoch": 0.89, + "grad_norm": 1.6793441758707297, + "learning_rate": 3.047769313755977e-07, + "loss": 0.4899, + "step": 12565 + }, + { + "epoch": 0.89, + "grad_norm": 1.6333958925848486, + "learning_rate": 3.0438197717722497e-07, + "loss": 0.5495, + "step": 12566 + }, + { + "epoch": 0.89, + "grad_norm": 1.6768849836675201, + "learning_rate": 3.0398727101730643e-07, + "loss": 0.5177, + "step": 12567 + }, + { + "epoch": 0.89, + "grad_norm": 1.6107014437582887, + "learning_rate": 3.0359281291669375e-07, + "loss": 0.4841, + "step": 12568 + }, + { + "epoch": 0.89, + "grad_norm": 1.9567287909895485, + "learning_rate": 3.031986028962203e-07, + "loss": 0.5453, + "step": 12569 + }, + { + "epoch": 0.89, + "grad_norm": 1.9784228551109233, + "learning_rate": 3.0280464097671325e-07, + "loss": 0.492, + "step": 12570 + }, + { + "epoch": 0.89, + "grad_norm": 2.823760220653909, + "learning_rate": 3.0241092717898044e-07, + "loss": 0.4787, + "step": 12571 + }, + { + "epoch": 0.89, + "grad_norm": 1.5937144051798962, + "learning_rate": 3.0201746152382026e-07, + "loss": 0.5337, + "step": 12572 + }, + { + "epoch": 0.89, + "grad_norm": 1.6381979623448044, + "learning_rate": 3.0162424403201717e-07, + "loss": 0.5279, + "step": 12573 + }, + { + "epoch": 0.89, + "grad_norm": 1.659974824679718, + "learning_rate": 3.012312747243412e-07, + "loss": 0.5062, + "step": 12574 + }, + { + "epoch": 0.89, + "grad_norm": 1.902696925242269, + "learning_rate": 3.008385536215519e-07, + "loss": 0.5264, + "step": 12575 + }, + { + "epoch": 0.89, + "grad_norm": 1.9388114875813645, + "learning_rate": 3.004460807443921e-07, + "loss": 0.52, + "step": 12576 + }, + { + "epoch": 0.89, + "grad_norm": 1.9188063683712924, + "learning_rate": 3.000538561135946e-07, + "loss": 0.5571, + "step": 12577 + }, + { + "epoch": 0.89, + "grad_norm": 1.5599176770682812, + "learning_rate": 2.996618797498785e-07, + "loss": 0.5477, + "step": 12578 + }, + { + "epoch": 0.89, + "grad_norm": 1.6788869500385653, + "learning_rate": 2.9927015167394825e-07, + "loss": 0.5145, + "step": 12579 + }, + { + "epoch": 0.89, + "grad_norm": 1.7033205231160007, + "learning_rate": 2.9887867190649787e-07, + "loss": 0.5058, + "step": 12580 + }, + { + "epoch": 0.89, + "grad_norm": 1.702883391627166, + "learning_rate": 2.9848744046820365e-07, + "loss": 0.5591, + "step": 12581 + }, + { + "epoch": 0.89, + "grad_norm": 2.1593497753580824, + "learning_rate": 2.980964573797357e-07, + "loss": 0.5563, + "step": 12582 + }, + { + "epoch": 0.89, + "grad_norm": 1.7830795849894678, + "learning_rate": 2.977057226617447e-07, + "loss": 0.507, + "step": 12583 + }, + { + "epoch": 0.89, + "grad_norm": 1.7816462389028185, + "learning_rate": 2.9731523633487024e-07, + "loss": 0.4639, + "step": 12584 + }, + { + "epoch": 0.89, + "grad_norm": 1.7888232007306213, + "learning_rate": 2.969249984197403e-07, + "loss": 0.5313, + "step": 12585 + }, + { + "epoch": 0.89, + "grad_norm": 1.7156642066744452, + "learning_rate": 2.96535008936969e-07, + "loss": 0.5192, + "step": 12586 + }, + { + "epoch": 0.89, + "grad_norm": 1.9278711141420761, + "learning_rate": 2.961452679071547e-07, + "loss": 0.5825, + "step": 12587 + }, + { + "epoch": 0.89, + "grad_norm": 2.5511461940170626, + "learning_rate": 2.9575577535088607e-07, + "loss": 0.59, + "step": 12588 + }, + { + "epoch": 0.89, + "grad_norm": 1.6025922948800877, + "learning_rate": 2.953665312887388e-07, + "loss": 0.5198, + "step": 12589 + }, + { + "epoch": 0.89, + "grad_norm": 1.786652456663681, + "learning_rate": 2.9497753574127153e-07, + "loss": 0.5778, + "step": 12590 + }, + { + "epoch": 0.89, + "grad_norm": 1.6847330373715332, + "learning_rate": 2.9458878872903495e-07, + "loss": 0.4711, + "step": 12591 + }, + { + "epoch": 0.89, + "grad_norm": 2.428077130260277, + "learning_rate": 2.9420029027256103e-07, + "loss": 0.5304, + "step": 12592 + }, + { + "epoch": 0.89, + "grad_norm": 0.9019794540373057, + "learning_rate": 2.93812040392375e-07, + "loss": 0.4432, + "step": 12593 + }, + { + "epoch": 0.89, + "grad_norm": 0.7006085962224345, + "learning_rate": 2.934240391089826e-07, + "loss": 0.4138, + "step": 12594 + }, + { + "epoch": 0.89, + "grad_norm": 1.6378776511210356, + "learning_rate": 2.93036286442881e-07, + "loss": 0.4965, + "step": 12595 + }, + { + "epoch": 0.89, + "grad_norm": 1.8771780799177442, + "learning_rate": 2.926487824145513e-07, + "loss": 0.5017, + "step": 12596 + }, + { + "epoch": 0.89, + "grad_norm": 0.7068106309235032, + "learning_rate": 2.9226152704446455e-07, + "loss": 0.4148, + "step": 12597 + }, + { + "epoch": 0.89, + "grad_norm": 1.7191428484984292, + "learning_rate": 2.9187452035307596e-07, + "loss": 0.5169, + "step": 12598 + }, + { + "epoch": 0.89, + "grad_norm": 1.7077416030716195, + "learning_rate": 2.91487762360827e-07, + "loss": 0.5233, + "step": 12599 + }, + { + "epoch": 0.89, + "grad_norm": 1.6323401988794095, + "learning_rate": 2.911012530881507e-07, + "loss": 0.5946, + "step": 12600 + }, + { + "epoch": 0.89, + "grad_norm": 1.5653659064062897, + "learning_rate": 2.9071499255546196e-07, + "loss": 0.5506, + "step": 12601 + }, + { + "epoch": 0.89, + "grad_norm": 1.7244412780214051, + "learning_rate": 2.9032898078316384e-07, + "loss": 0.5394, + "step": 12602 + }, + { + "epoch": 0.89, + "grad_norm": 1.8543204034393266, + "learning_rate": 2.899432177916478e-07, + "loss": 0.5415, + "step": 12603 + }, + { + "epoch": 0.89, + "grad_norm": 1.9592909607014217, + "learning_rate": 2.895577036012909e-07, + "loss": 0.5133, + "step": 12604 + }, + { + "epoch": 0.89, + "grad_norm": 1.8897191564023803, + "learning_rate": 2.8917243823245786e-07, + "loss": 0.5757, + "step": 12605 + }, + { + "epoch": 0.89, + "grad_norm": 1.452172533661388, + "learning_rate": 2.8878742170549813e-07, + "loss": 0.4811, + "step": 12606 + }, + { + "epoch": 0.89, + "grad_norm": 2.0902452452399762, + "learning_rate": 2.8840265404075087e-07, + "loss": 0.5497, + "step": 12607 + }, + { + "epoch": 0.89, + "grad_norm": 1.825188805121277, + "learning_rate": 2.8801813525854036e-07, + "loss": 0.5859, + "step": 12608 + }, + { + "epoch": 0.89, + "grad_norm": 2.179325531989453, + "learning_rate": 2.8763386537917825e-07, + "loss": 0.5027, + "step": 12609 + }, + { + "epoch": 0.89, + "grad_norm": 1.714759902893055, + "learning_rate": 2.872498444229643e-07, + "loss": 0.5399, + "step": 12610 + }, + { + "epoch": 0.89, + "grad_norm": 1.7998267889267174, + "learning_rate": 2.868660724101807e-07, + "loss": 0.5383, + "step": 12611 + }, + { + "epoch": 0.89, + "grad_norm": 1.8301061314544274, + "learning_rate": 2.864825493611029e-07, + "loss": 0.4884, + "step": 12612 + }, + { + "epoch": 0.9, + "grad_norm": 1.712235098295122, + "learning_rate": 2.8609927529598737e-07, + "loss": 0.5014, + "step": 12613 + }, + { + "epoch": 0.9, + "grad_norm": 1.791156572768819, + "learning_rate": 2.8571625023508245e-07, + "loss": 0.5461, + "step": 12614 + }, + { + "epoch": 0.9, + "grad_norm": 1.659704413571172, + "learning_rate": 2.8533347419861745e-07, + "loss": 0.5316, + "step": 12615 + }, + { + "epoch": 0.9, + "grad_norm": 2.357412303161694, + "learning_rate": 2.849509472068146e-07, + "loss": 0.4891, + "step": 12616 + }, + { + "epoch": 0.9, + "grad_norm": 1.8635510415628678, + "learning_rate": 2.845686692798799e-07, + "loss": 0.5491, + "step": 12617 + }, + { + "epoch": 0.9, + "grad_norm": 0.6213281005722057, + "learning_rate": 2.84186640438005e-07, + "loss": 0.3846, + "step": 12618 + }, + { + "epoch": 0.9, + "grad_norm": 1.5512755701445857, + "learning_rate": 2.838048607013727e-07, + "loss": 0.532, + "step": 12619 + }, + { + "epoch": 0.9, + "grad_norm": 1.5270898148623333, + "learning_rate": 2.834233300901473e-07, + "loss": 0.5225, + "step": 12620 + }, + { + "epoch": 0.9, + "grad_norm": 1.900257539282859, + "learning_rate": 2.8304204862448445e-07, + "loss": 0.4717, + "step": 12621 + }, + { + "epoch": 0.9, + "grad_norm": 1.7455862971956742, + "learning_rate": 2.8266101632452246e-07, + "loss": 0.5642, + "step": 12622 + }, + { + "epoch": 0.9, + "grad_norm": 1.9468392384157973, + "learning_rate": 2.8228023321039135e-07, + "loss": 0.4935, + "step": 12623 + }, + { + "epoch": 0.9, + "grad_norm": 1.5536161381465765, + "learning_rate": 2.8189969930220327e-07, + "loss": 0.4681, + "step": 12624 + }, + { + "epoch": 0.9, + "grad_norm": 2.0191128188154113, + "learning_rate": 2.815194146200606e-07, + "loss": 0.5484, + "step": 12625 + }, + { + "epoch": 0.9, + "grad_norm": 0.7082325441488533, + "learning_rate": 2.8113937918405053e-07, + "loss": 0.4175, + "step": 12626 + }, + { + "epoch": 0.9, + "grad_norm": 1.841608082630723, + "learning_rate": 2.8075959301424814e-07, + "loss": 0.482, + "step": 12627 + }, + { + "epoch": 0.9, + "grad_norm": 2.301984529597994, + "learning_rate": 2.8038005613071626e-07, + "loss": 0.446, + "step": 12628 + }, + { + "epoch": 0.9, + "grad_norm": 2.304615487218864, + "learning_rate": 2.8000076855350056e-07, + "loss": 0.506, + "step": 12629 + }, + { + "epoch": 0.9, + "grad_norm": 1.8631016760100112, + "learning_rate": 2.796217303026383e-07, + "loss": 0.5567, + "step": 12630 + }, + { + "epoch": 0.9, + "grad_norm": 1.8419418107893775, + "learning_rate": 2.792429413981512e-07, + "loss": 0.5328, + "step": 12631 + }, + { + "epoch": 0.9, + "grad_norm": 1.3863056347575036, + "learning_rate": 2.788644018600478e-07, + "loss": 0.4372, + "step": 12632 + }, + { + "epoch": 0.9, + "grad_norm": 2.6542321668746274, + "learning_rate": 2.7848611170832376e-07, + "loss": 0.5249, + "step": 12633 + }, + { + "epoch": 0.9, + "grad_norm": 1.7860389743462115, + "learning_rate": 2.7810807096296246e-07, + "loss": 0.553, + "step": 12634 + }, + { + "epoch": 0.9, + "grad_norm": 1.6754011345895023, + "learning_rate": 2.777302796439335e-07, + "loss": 0.5174, + "step": 12635 + }, + { + "epoch": 0.9, + "grad_norm": 3.3393878176164904, + "learning_rate": 2.7735273777119153e-07, + "loss": 0.4955, + "step": 12636 + }, + { + "epoch": 0.9, + "grad_norm": 1.846853399911836, + "learning_rate": 2.769754453646806e-07, + "loss": 0.5158, + "step": 12637 + }, + { + "epoch": 0.9, + "grad_norm": 2.5353014385334527, + "learning_rate": 2.7659840244433025e-07, + "loss": 0.4891, + "step": 12638 + }, + { + "epoch": 0.9, + "grad_norm": 3.730672181262175, + "learning_rate": 2.7622160903005737e-07, + "loss": 0.4797, + "step": 12639 + }, + { + "epoch": 0.9, + "grad_norm": 2.1405968772232105, + "learning_rate": 2.75845065141766e-07, + "loss": 0.5258, + "step": 12640 + }, + { + "epoch": 0.9, + "grad_norm": 1.9748259792141227, + "learning_rate": 2.7546877079934473e-07, + "loss": 0.4872, + "step": 12641 + }, + { + "epoch": 0.9, + "grad_norm": 1.5172793288264743, + "learning_rate": 2.750927260226732e-07, + "loss": 0.4737, + "step": 12642 + }, + { + "epoch": 0.9, + "grad_norm": 1.6214503134802014, + "learning_rate": 2.7471693083161334e-07, + "loss": 0.4927, + "step": 12643 + }, + { + "epoch": 0.9, + "grad_norm": 1.6889251605129691, + "learning_rate": 2.7434138524601696e-07, + "loss": 0.4959, + "step": 12644 + }, + { + "epoch": 0.9, + "grad_norm": 2.108090182953655, + "learning_rate": 2.73966089285721e-07, + "loss": 0.4218, + "step": 12645 + }, + { + "epoch": 0.9, + "grad_norm": 1.768757353925345, + "learning_rate": 2.735910429705502e-07, + "loss": 0.4457, + "step": 12646 + }, + { + "epoch": 0.9, + "grad_norm": 1.6422180326770917, + "learning_rate": 2.7321624632031694e-07, + "loss": 0.5204, + "step": 12647 + }, + { + "epoch": 0.9, + "grad_norm": 1.604038047791915, + "learning_rate": 2.728416993548161e-07, + "loss": 0.513, + "step": 12648 + }, + { + "epoch": 0.9, + "grad_norm": 0.7361590662842092, + "learning_rate": 2.7246740209383617e-07, + "loss": 0.4134, + "step": 12649 + }, + { + "epoch": 0.9, + "grad_norm": 1.603988079819678, + "learning_rate": 2.7209335455714634e-07, + "loss": 0.5278, + "step": 12650 + }, + { + "epoch": 0.9, + "grad_norm": 1.60668062937063, + "learning_rate": 2.7171955676450645e-07, + "loss": 0.5254, + "step": 12651 + }, + { + "epoch": 0.9, + "grad_norm": 2.0539533623653017, + "learning_rate": 2.713460087356601e-07, + "loss": 0.4757, + "step": 12652 + }, + { + "epoch": 0.9, + "grad_norm": 1.6324746298878663, + "learning_rate": 2.709727104903409e-07, + "loss": 0.4641, + "step": 12653 + }, + { + "epoch": 0.9, + "grad_norm": 2.1978328787332275, + "learning_rate": 2.705996620482676e-07, + "loss": 0.5111, + "step": 12654 + }, + { + "epoch": 0.9, + "grad_norm": 1.8185328264679268, + "learning_rate": 2.702268634291455e-07, + "loss": 0.5951, + "step": 12655 + }, + { + "epoch": 0.9, + "grad_norm": 1.8206380725852163, + "learning_rate": 2.6985431465266666e-07, + "loss": 0.4388, + "step": 12656 + }, + { + "epoch": 0.9, + "grad_norm": 0.6721611403124952, + "learning_rate": 2.694820157385114e-07, + "loss": 0.4328, + "step": 12657 + }, + { + "epoch": 0.9, + "grad_norm": 1.6225581686512236, + "learning_rate": 2.6910996670634516e-07, + "loss": 0.5122, + "step": 12658 + }, + { + "epoch": 0.9, + "grad_norm": 1.8278943835333348, + "learning_rate": 2.687381675758211e-07, + "loss": 0.4824, + "step": 12659 + }, + { + "epoch": 0.9, + "grad_norm": 1.6864328605930328, + "learning_rate": 2.6836661836657783e-07, + "loss": 0.4953, + "step": 12660 + }, + { + "epoch": 0.9, + "grad_norm": 1.9774623394007422, + "learning_rate": 2.6799531909824315e-07, + "loss": 0.5883, + "step": 12661 + }, + { + "epoch": 0.9, + "grad_norm": 1.6623897926028357, + "learning_rate": 2.676242697904302e-07, + "loss": 0.6034, + "step": 12662 + }, + { + "epoch": 0.9, + "grad_norm": 1.8348595342446348, + "learning_rate": 2.6725347046273887e-07, + "loss": 0.4652, + "step": 12663 + }, + { + "epoch": 0.9, + "grad_norm": 1.8603397793981393, + "learning_rate": 2.6688292113475566e-07, + "loss": 0.5075, + "step": 12664 + }, + { + "epoch": 0.9, + "grad_norm": 1.6639125522391218, + "learning_rate": 2.6651262182605496e-07, + "loss": 0.4824, + "step": 12665 + }, + { + "epoch": 0.9, + "grad_norm": 0.7451529656300367, + "learning_rate": 2.661425725561967e-07, + "loss": 0.4178, + "step": 12666 + }, + { + "epoch": 0.9, + "grad_norm": 0.6879709862534997, + "learning_rate": 2.6577277334472793e-07, + "loss": 0.4113, + "step": 12667 + }, + { + "epoch": 0.9, + "grad_norm": 1.767935061910562, + "learning_rate": 2.6540322421118313e-07, + "loss": 0.588, + "step": 12668 + }, + { + "epoch": 0.9, + "grad_norm": 1.5908417034501903, + "learning_rate": 2.6503392517508275e-07, + "loss": 0.4362, + "step": 12669 + }, + { + "epoch": 0.9, + "grad_norm": 1.7741862559239043, + "learning_rate": 2.646648762559356e-07, + "loss": 0.4872, + "step": 12670 + }, + { + "epoch": 0.9, + "grad_norm": 0.6931247775428896, + "learning_rate": 2.6429607747323393e-07, + "loss": 0.4193, + "step": 12671 + }, + { + "epoch": 0.9, + "grad_norm": 1.8038879722615504, + "learning_rate": 2.6392752884646156e-07, + "loss": 0.5324, + "step": 12672 + }, + { + "epoch": 0.9, + "grad_norm": 1.6694481054008183, + "learning_rate": 2.6355923039508404e-07, + "loss": 0.5525, + "step": 12673 + }, + { + "epoch": 0.9, + "grad_norm": 1.658243421095066, + "learning_rate": 2.6319118213855745e-07, + "loss": 0.504, + "step": 12674 + }, + { + "epoch": 0.9, + "grad_norm": 1.4774394591598534, + "learning_rate": 2.628233840963235e-07, + "loss": 0.4234, + "step": 12675 + }, + { + "epoch": 0.9, + "grad_norm": 2.670672959642424, + "learning_rate": 2.624558362878099e-07, + "loss": 0.5109, + "step": 12676 + }, + { + "epoch": 0.9, + "grad_norm": 1.6106937230458371, + "learning_rate": 2.6208853873243233e-07, + "loss": 0.5176, + "step": 12677 + }, + { + "epoch": 0.9, + "grad_norm": 1.8188661357949891, + "learning_rate": 2.617214914495925e-07, + "loss": 0.6206, + "step": 12678 + }, + { + "epoch": 0.9, + "grad_norm": 1.8444630700234739, + "learning_rate": 2.613546944586781e-07, + "loss": 0.5398, + "step": 12679 + }, + { + "epoch": 0.9, + "grad_norm": 1.7965362265636515, + "learning_rate": 2.6098814777906603e-07, + "loss": 0.5131, + "step": 12680 + }, + { + "epoch": 0.9, + "grad_norm": 1.803197513668809, + "learning_rate": 2.606218514301179e-07, + "loss": 0.5984, + "step": 12681 + }, + { + "epoch": 0.9, + "grad_norm": 2.3723385823146947, + "learning_rate": 2.6025580543118266e-07, + "loss": 0.4581, + "step": 12682 + }, + { + "epoch": 0.9, + "grad_norm": 1.7404512966050116, + "learning_rate": 2.5989000980159605e-07, + "loss": 0.4451, + "step": 12683 + }, + { + "epoch": 0.9, + "grad_norm": 4.716802982532928, + "learning_rate": 2.5952446456068147e-07, + "loss": 0.5504, + "step": 12684 + }, + { + "epoch": 0.9, + "grad_norm": 1.7032712041715503, + "learning_rate": 2.5915916972774736e-07, + "loss": 0.5848, + "step": 12685 + }, + { + "epoch": 0.9, + "grad_norm": 1.897051450745246, + "learning_rate": 2.5879412532208993e-07, + "loss": 0.597, + "step": 12686 + }, + { + "epoch": 0.9, + "grad_norm": 0.746706573128253, + "learning_rate": 2.584293313629921e-07, + "loss": 0.4234, + "step": 12687 + }, + { + "epoch": 0.9, + "grad_norm": 1.9181149309020304, + "learning_rate": 2.58064787869724e-07, + "loss": 0.4781, + "step": 12688 + }, + { + "epoch": 0.9, + "grad_norm": 1.7920063547865288, + "learning_rate": 2.577004948615414e-07, + "loss": 0.5512, + "step": 12689 + }, + { + "epoch": 0.9, + "grad_norm": 0.7225141325912885, + "learning_rate": 2.5733645235768766e-07, + "loss": 0.4302, + "step": 12690 + }, + { + "epoch": 0.9, + "grad_norm": 1.494289814419068, + "learning_rate": 2.5697266037739253e-07, + "loss": 0.475, + "step": 12691 + }, + { + "epoch": 0.9, + "grad_norm": 2.3327465466111534, + "learning_rate": 2.5660911893987337e-07, + "loss": 0.559, + "step": 12692 + }, + { + "epoch": 0.9, + "grad_norm": 1.880594408493992, + "learning_rate": 2.562458280643343e-07, + "loss": 0.5588, + "step": 12693 + }, + { + "epoch": 0.9, + "grad_norm": 1.7360575729267824, + "learning_rate": 2.558827877699627e-07, + "loss": 0.4604, + "step": 12694 + }, + { + "epoch": 0.9, + "grad_norm": 1.5753545760948986, + "learning_rate": 2.555199980759393e-07, + "loss": 0.5202, + "step": 12695 + }, + { + "epoch": 0.9, + "grad_norm": 1.5839456085661119, + "learning_rate": 2.55157459001425e-07, + "loss": 0.4664, + "step": 12696 + }, + { + "epoch": 0.9, + "grad_norm": 1.657120155041787, + "learning_rate": 2.5479517056557214e-07, + "loss": 0.5251, + "step": 12697 + }, + { + "epoch": 0.9, + "grad_norm": 1.7245698023094969, + "learning_rate": 2.5443313278751656e-07, + "loss": 0.4897, + "step": 12698 + }, + { + "epoch": 0.9, + "grad_norm": 1.7103033677898396, + "learning_rate": 2.5407134568638357e-07, + "loss": 0.5476, + "step": 12699 + }, + { + "epoch": 0.9, + "grad_norm": 1.6237081031423706, + "learning_rate": 2.53709809281284e-07, + "loss": 0.5127, + "step": 12700 + }, + { + "epoch": 0.9, + "grad_norm": 1.8289877525626415, + "learning_rate": 2.5334852359131356e-07, + "loss": 0.539, + "step": 12701 + }, + { + "epoch": 0.9, + "grad_norm": 1.8459814438274638, + "learning_rate": 2.5298748863555935e-07, + "loss": 0.54, + "step": 12702 + }, + { + "epoch": 0.9, + "grad_norm": 1.3871047095490303, + "learning_rate": 2.5262670443309045e-07, + "loss": 0.4235, + "step": 12703 + }, + { + "epoch": 0.9, + "grad_norm": 2.3502516099606585, + "learning_rate": 2.5226617100296504e-07, + "loss": 0.4722, + "step": 12704 + }, + { + "epoch": 0.9, + "grad_norm": 0.652451729796664, + "learning_rate": 2.5190588836422835e-07, + "loss": 0.4163, + "step": 12705 + }, + { + "epoch": 0.9, + "grad_norm": 2.10700019892435, + "learning_rate": 2.515458565359108e-07, + "loss": 0.5467, + "step": 12706 + }, + { + "epoch": 0.9, + "grad_norm": 1.5844130841816766, + "learning_rate": 2.5118607553703214e-07, + "loss": 0.5514, + "step": 12707 + }, + { + "epoch": 0.9, + "grad_norm": 1.7769491683970342, + "learning_rate": 2.5082654538659546e-07, + "loss": 0.4792, + "step": 12708 + }, + { + "epoch": 0.9, + "grad_norm": 1.7423771811779871, + "learning_rate": 2.504672661035934e-07, + "loss": 0.5378, + "step": 12709 + }, + { + "epoch": 0.9, + "grad_norm": 1.5805357750008233, + "learning_rate": 2.5010823770700354e-07, + "loss": 0.4955, + "step": 12710 + }, + { + "epoch": 0.9, + "grad_norm": 0.7128316440233982, + "learning_rate": 2.4974946021579125e-07, + "loss": 0.442, + "step": 12711 + }, + { + "epoch": 0.9, + "grad_norm": 1.8006057306944947, + "learning_rate": 2.493909336489092e-07, + "loss": 0.5045, + "step": 12712 + }, + { + "epoch": 0.9, + "grad_norm": 1.568745433139465, + "learning_rate": 2.490326580252944e-07, + "loss": 0.4581, + "step": 12713 + }, + { + "epoch": 0.9, + "grad_norm": 1.909795599980966, + "learning_rate": 2.4867463336387454e-07, + "loss": 0.5457, + "step": 12714 + }, + { + "epoch": 0.9, + "grad_norm": 1.823375250464417, + "learning_rate": 2.483168596835589e-07, + "loss": 0.5163, + "step": 12715 + }, + { + "epoch": 0.9, + "grad_norm": 1.6976184050426828, + "learning_rate": 2.4795933700324846e-07, + "loss": 0.5288, + "step": 12716 + }, + { + "epoch": 0.9, + "grad_norm": 2.866068915370391, + "learning_rate": 2.476020653418271e-07, + "loss": 0.4138, + "step": 12717 + }, + { + "epoch": 0.9, + "grad_norm": 1.8702720226616825, + "learning_rate": 2.47245044718169e-07, + "loss": 0.5093, + "step": 12718 + }, + { + "epoch": 0.9, + "grad_norm": 2.4102874474745914, + "learning_rate": 2.4688827515113143e-07, + "loss": 0.6072, + "step": 12719 + }, + { + "epoch": 0.9, + "grad_norm": 0.7765533453112328, + "learning_rate": 2.465317566595604e-07, + "loss": 0.4422, + "step": 12720 + }, + { + "epoch": 0.9, + "grad_norm": 1.7631187701441917, + "learning_rate": 2.461754892622903e-07, + "loss": 0.5153, + "step": 12721 + }, + { + "epoch": 0.9, + "grad_norm": 1.8474875714521757, + "learning_rate": 2.4581947297813825e-07, + "loss": 0.5064, + "step": 12722 + }, + { + "epoch": 0.9, + "grad_norm": 1.5823223189861697, + "learning_rate": 2.4546370782591147e-07, + "loss": 0.4803, + "step": 12723 + }, + { + "epoch": 0.9, + "grad_norm": 1.8557883592668791, + "learning_rate": 2.45108193824401e-07, + "loss": 0.504, + "step": 12724 + }, + { + "epoch": 0.9, + "grad_norm": 1.695964157990849, + "learning_rate": 2.4475293099238905e-07, + "loss": 0.5327, + "step": 12725 + }, + { + "epoch": 0.9, + "grad_norm": 1.6248062663050375, + "learning_rate": 2.4439791934863957e-07, + "loss": 0.5034, + "step": 12726 + }, + { + "epoch": 0.9, + "grad_norm": 1.6775142440015796, + "learning_rate": 2.440431589119058e-07, + "loss": 0.5394, + "step": 12727 + }, + { + "epoch": 0.9, + "grad_norm": 1.5742147916582065, + "learning_rate": 2.4368864970092776e-07, + "loss": 0.492, + "step": 12728 + }, + { + "epoch": 0.9, + "grad_norm": 1.4891188143072773, + "learning_rate": 2.433343917344322e-07, + "loss": 0.5095, + "step": 12729 + }, + { + "epoch": 0.9, + "grad_norm": 1.8458558350587122, + "learning_rate": 2.429803850311324e-07, + "loss": 0.6044, + "step": 12730 + }, + { + "epoch": 0.9, + "grad_norm": 1.4149480400706864, + "learning_rate": 2.426266296097263e-07, + "loss": 0.5193, + "step": 12731 + }, + { + "epoch": 0.9, + "grad_norm": 0.668582836047589, + "learning_rate": 2.4227312548890325e-07, + "loss": 0.417, + "step": 12732 + }, + { + "epoch": 0.9, + "grad_norm": 1.696232849546226, + "learning_rate": 2.41919872687334e-07, + "loss": 0.4873, + "step": 12733 + }, + { + "epoch": 0.9, + "grad_norm": 1.9994363840717644, + "learning_rate": 2.4156687122368016e-07, + "loss": 0.4863, + "step": 12734 + }, + { + "epoch": 0.9, + "grad_norm": 1.6550694503793202, + "learning_rate": 2.4121412111658746e-07, + "loss": 0.4913, + "step": 12735 + }, + { + "epoch": 0.9, + "grad_norm": 2.9173395497030055, + "learning_rate": 2.4086162238469045e-07, + "loss": 0.5509, + "step": 12736 + }, + { + "epoch": 0.9, + "grad_norm": 1.6190433159642212, + "learning_rate": 2.4050937504660866e-07, + "loss": 0.5305, + "step": 12737 + }, + { + "epoch": 0.9, + "grad_norm": 1.6013913139188896, + "learning_rate": 2.401573791209488e-07, + "loss": 0.4359, + "step": 12738 + }, + { + "epoch": 0.9, + "grad_norm": 1.7469842535438236, + "learning_rate": 2.39805634626305e-07, + "loss": 0.5356, + "step": 12739 + }, + { + "epoch": 0.9, + "grad_norm": 1.6316081275779228, + "learning_rate": 2.394541415812568e-07, + "loss": 0.5112, + "step": 12740 + }, + { + "epoch": 0.9, + "grad_norm": 1.6196806625098836, + "learning_rate": 2.3910290000437207e-07, + "loss": 0.5357, + "step": 12741 + }, + { + "epoch": 0.9, + "grad_norm": 5.549438197176269, + "learning_rate": 2.387519099142049e-07, + "loss": 0.4662, + "step": 12742 + }, + { + "epoch": 0.9, + "grad_norm": 1.5884896490620557, + "learning_rate": 2.384011713292933e-07, + "loss": 0.461, + "step": 12743 + }, + { + "epoch": 0.9, + "grad_norm": 1.5861294807642932, + "learning_rate": 2.3805068426816847e-07, + "loss": 0.5202, + "step": 12744 + }, + { + "epoch": 0.9, + "grad_norm": 7.800853640963795, + "learning_rate": 2.3770044874934117e-07, + "loss": 0.5534, + "step": 12745 + }, + { + "epoch": 0.9, + "grad_norm": 0.7327610298248551, + "learning_rate": 2.3735046479131274e-07, + "loss": 0.4465, + "step": 12746 + }, + { + "epoch": 0.9, + "grad_norm": 1.78794757616712, + "learning_rate": 2.3700073241257117e-07, + "loss": 0.461, + "step": 12747 + }, + { + "epoch": 0.9, + "grad_norm": 1.6992487112074224, + "learning_rate": 2.3665125163159052e-07, + "loss": 0.5507, + "step": 12748 + }, + { + "epoch": 0.9, + "grad_norm": 1.7969576077098495, + "learning_rate": 2.3630202246683109e-07, + "loss": 0.5212, + "step": 12749 + }, + { + "epoch": 0.9, + "grad_norm": 1.8448275415665345, + "learning_rate": 2.3595304493673977e-07, + "loss": 0.523, + "step": 12750 + }, + { + "epoch": 0.9, + "grad_norm": 1.9400387095410139, + "learning_rate": 2.3560431905975234e-07, + "loss": 0.5918, + "step": 12751 + }, + { + "epoch": 0.9, + "grad_norm": 2.4842316301573715, + "learning_rate": 2.3525584485428799e-07, + "loss": 0.4888, + "step": 12752 + }, + { + "epoch": 0.9, + "grad_norm": 2.094220179435843, + "learning_rate": 2.349076223387564e-07, + "loss": 0.506, + "step": 12753 + }, + { + "epoch": 0.91, + "grad_norm": 1.5490564310415358, + "learning_rate": 2.3455965153154847e-07, + "loss": 0.5303, + "step": 12754 + }, + { + "epoch": 0.91, + "grad_norm": 1.613553412728933, + "learning_rate": 2.342119324510489e-07, + "loss": 0.5622, + "step": 12755 + }, + { + "epoch": 0.91, + "grad_norm": 1.876349133523773, + "learning_rate": 2.33864465115623e-07, + "loss": 0.5674, + "step": 12756 + }, + { + "epoch": 0.91, + "grad_norm": 1.4581458478697864, + "learning_rate": 2.3351724954362608e-07, + "loss": 0.5018, + "step": 12757 + }, + { + "epoch": 0.91, + "grad_norm": 1.532832900762323, + "learning_rate": 2.3317028575339906e-07, + "loss": 0.5139, + "step": 12758 + }, + { + "epoch": 0.91, + "grad_norm": 0.7173337432027899, + "learning_rate": 2.3282357376326947e-07, + "loss": 0.4211, + "step": 12759 + }, + { + "epoch": 0.91, + "grad_norm": 1.6956049801149211, + "learning_rate": 2.3247711359155324e-07, + "loss": 0.5289, + "step": 12760 + }, + { + "epoch": 0.91, + "grad_norm": 2.1449348835594253, + "learning_rate": 2.3213090525654902e-07, + "loss": 0.5615, + "step": 12761 + }, + { + "epoch": 0.91, + "grad_norm": 1.6784218486321325, + "learning_rate": 2.3178494877654724e-07, + "loss": 0.4773, + "step": 12762 + }, + { + "epoch": 0.91, + "grad_norm": 1.888948000713616, + "learning_rate": 2.3143924416982044e-07, + "loss": 0.5595, + "step": 12763 + }, + { + "epoch": 0.91, + "grad_norm": 0.7191948867790153, + "learning_rate": 2.3109379145463128e-07, + "loss": 0.4302, + "step": 12764 + }, + { + "epoch": 0.91, + "grad_norm": 2.529244734370472, + "learning_rate": 2.307485906492274e-07, + "loss": 0.4695, + "step": 12765 + }, + { + "epoch": 0.91, + "grad_norm": 1.6502624377433872, + "learning_rate": 2.3040364177184305e-07, + "loss": 0.5345, + "step": 12766 + }, + { + "epoch": 0.91, + "grad_norm": 1.870126874704981, + "learning_rate": 2.3005894484070092e-07, + "loss": 0.5905, + "step": 12767 + }, + { + "epoch": 0.91, + "grad_norm": 1.8569725551205007, + "learning_rate": 2.297144998740075e-07, + "loss": 0.5435, + "step": 12768 + }, + { + "epoch": 0.91, + "grad_norm": 1.8362850571045017, + "learning_rate": 2.2937030688995776e-07, + "loss": 0.5202, + "step": 12769 + }, + { + "epoch": 0.91, + "grad_norm": 2.2351174278379373, + "learning_rate": 2.2902636590673376e-07, + "loss": 0.5478, + "step": 12770 + }, + { + "epoch": 0.91, + "grad_norm": 1.6411435966691348, + "learning_rate": 2.2868267694250322e-07, + "loss": 0.5211, + "step": 12771 + }, + { + "epoch": 0.91, + "grad_norm": 1.7264879993726723, + "learning_rate": 2.2833924001542163e-07, + "loss": 0.5053, + "step": 12772 + }, + { + "epoch": 0.91, + "grad_norm": 0.665588816249034, + "learning_rate": 2.2799605514362888e-07, + "loss": 0.4269, + "step": 12773 + }, + { + "epoch": 0.91, + "grad_norm": 1.713833009086183, + "learning_rate": 2.276531223452555e-07, + "loss": 0.5101, + "step": 12774 + }, + { + "epoch": 0.91, + "grad_norm": 1.4470838559102182, + "learning_rate": 2.2731044163841477e-07, + "loss": 0.4818, + "step": 12775 + }, + { + "epoch": 0.91, + "grad_norm": 2.115648654273564, + "learning_rate": 2.269680130412083e-07, + "loss": 0.5013, + "step": 12776 + }, + { + "epoch": 0.91, + "grad_norm": 1.506165745728885, + "learning_rate": 2.2662583657172444e-07, + "loss": 0.4877, + "step": 12777 + }, + { + "epoch": 0.91, + "grad_norm": 1.7364834039667563, + "learning_rate": 2.2628391224803868e-07, + "loss": 0.4255, + "step": 12778 + }, + { + "epoch": 0.91, + "grad_norm": 2.3298774433007754, + "learning_rate": 2.2594224008821276e-07, + "loss": 0.4997, + "step": 12779 + }, + { + "epoch": 0.91, + "grad_norm": 2.676868802700795, + "learning_rate": 2.2560082011029326e-07, + "loss": 0.5058, + "step": 12780 + }, + { + "epoch": 0.91, + "grad_norm": 0.682350249259026, + "learning_rate": 2.252596523323175e-07, + "loss": 0.4304, + "step": 12781 + }, + { + "epoch": 0.91, + "grad_norm": 1.702551003384806, + "learning_rate": 2.2491873677230492e-07, + "loss": 0.4959, + "step": 12782 + }, + { + "epoch": 0.91, + "grad_norm": 1.5473745289080922, + "learning_rate": 2.245780734482661e-07, + "loss": 0.5503, + "step": 12783 + }, + { + "epoch": 0.91, + "grad_norm": 2.2596058206607212, + "learning_rate": 2.2423766237819332e-07, + "loss": 0.4786, + "step": 12784 + }, + { + "epoch": 0.91, + "grad_norm": 0.6857643862198614, + "learning_rate": 2.2389750358007e-07, + "loss": 0.4314, + "step": 12785 + }, + { + "epoch": 0.91, + "grad_norm": 1.752986504938983, + "learning_rate": 2.235575970718651e-07, + "loss": 0.4954, + "step": 12786 + }, + { + "epoch": 0.91, + "grad_norm": 2.6651851325089138, + "learning_rate": 2.2321794287153198e-07, + "loss": 0.486, + "step": 12787 + }, + { + "epoch": 0.91, + "grad_norm": 1.8999645346459915, + "learning_rate": 2.2287854099701301e-07, + "loss": 0.4675, + "step": 12788 + }, + { + "epoch": 0.91, + "grad_norm": 1.7010455351632758, + "learning_rate": 2.225393914662366e-07, + "loss": 0.516, + "step": 12789 + }, + { + "epoch": 0.91, + "grad_norm": 1.6378901022642982, + "learning_rate": 2.22200494297119e-07, + "loss": 0.484, + "step": 12790 + }, + { + "epoch": 0.91, + "grad_norm": 1.9202971033130536, + "learning_rate": 2.2186184950755862e-07, + "loss": 0.5384, + "step": 12791 + }, + { + "epoch": 0.91, + "grad_norm": 1.6454756158899664, + "learning_rate": 2.2152345711544732e-07, + "loss": 0.5059, + "step": 12792 + }, + { + "epoch": 0.91, + "grad_norm": 1.8749689371584566, + "learning_rate": 2.2118531713865854e-07, + "loss": 0.5171, + "step": 12793 + }, + { + "epoch": 0.91, + "grad_norm": 1.7172298075393906, + "learning_rate": 2.2084742959505358e-07, + "loss": 0.5601, + "step": 12794 + }, + { + "epoch": 0.91, + "grad_norm": 2.884477347158867, + "learning_rate": 2.20509794502482e-07, + "loss": 0.5585, + "step": 12795 + }, + { + "epoch": 0.91, + "grad_norm": 1.6126760046040245, + "learning_rate": 2.2017241187877736e-07, + "loss": 0.5126, + "step": 12796 + }, + { + "epoch": 0.91, + "grad_norm": 1.9760093410394666, + "learning_rate": 2.1983528174176372e-07, + "loss": 0.5094, + "step": 12797 + }, + { + "epoch": 0.91, + "grad_norm": 1.8418336596828138, + "learning_rate": 2.1949840410924682e-07, + "loss": 0.467, + "step": 12798 + }, + { + "epoch": 0.91, + "grad_norm": 0.6182225135299495, + "learning_rate": 2.1916177899902248e-07, + "loss": 0.3742, + "step": 12799 + }, + { + "epoch": 0.91, + "grad_norm": 1.7412256932121002, + "learning_rate": 2.1882540642887308e-07, + "loss": 0.5352, + "step": 12800 + }, + { + "epoch": 0.91, + "grad_norm": 1.6314739736421984, + "learning_rate": 2.1848928641656664e-07, + "loss": 0.468, + "step": 12801 + }, + { + "epoch": 0.91, + "grad_norm": 1.7001496155469862, + "learning_rate": 2.1815341897985842e-07, + "loss": 0.4898, + "step": 12802 + }, + { + "epoch": 0.91, + "grad_norm": 2.3657625861140903, + "learning_rate": 2.178178041364881e-07, + "loss": 0.4784, + "step": 12803 + }, + { + "epoch": 0.91, + "grad_norm": 1.8984403873119962, + "learning_rate": 2.1748244190418766e-07, + "loss": 0.5652, + "step": 12804 + }, + { + "epoch": 0.91, + "grad_norm": 1.6452831027597885, + "learning_rate": 2.1714733230066897e-07, + "loss": 0.5233, + "step": 12805 + }, + { + "epoch": 0.91, + "grad_norm": 2.1753818657430046, + "learning_rate": 2.168124753436346e-07, + "loss": 0.5023, + "step": 12806 + }, + { + "epoch": 0.91, + "grad_norm": 1.4615127118840312, + "learning_rate": 2.164778710507731e-07, + "loss": 0.4927, + "step": 12807 + }, + { + "epoch": 0.91, + "grad_norm": 0.6904294455096526, + "learning_rate": 2.1614351943975932e-07, + "loss": 0.3938, + "step": 12808 + }, + { + "epoch": 0.91, + "grad_norm": 1.444772360065789, + "learning_rate": 2.1580942052825515e-07, + "loss": 0.4256, + "step": 12809 + }, + { + "epoch": 0.91, + "grad_norm": 2.085776857037535, + "learning_rate": 2.1547557433390765e-07, + "loss": 0.494, + "step": 12810 + }, + { + "epoch": 0.91, + "grad_norm": 1.7325796695439903, + "learning_rate": 2.1514198087435322e-07, + "loss": 0.4897, + "step": 12811 + }, + { + "epoch": 0.91, + "grad_norm": 1.546304275013161, + "learning_rate": 2.1480864016721226e-07, + "loss": 0.4879, + "step": 12812 + }, + { + "epoch": 0.91, + "grad_norm": 2.4075575403422236, + "learning_rate": 2.1447555223009341e-07, + "loss": 0.571, + "step": 12813 + }, + { + "epoch": 0.91, + "grad_norm": 2.3589662735500236, + "learning_rate": 2.1414271708059153e-07, + "loss": 0.4887, + "step": 12814 + }, + { + "epoch": 0.91, + "grad_norm": 1.6775011186105564, + "learning_rate": 2.1381013473628754e-07, + "loss": 0.5393, + "step": 12815 + }, + { + "epoch": 0.91, + "grad_norm": 3.1084847057922835, + "learning_rate": 2.1347780521475126e-07, + "loss": 0.5254, + "step": 12816 + }, + { + "epoch": 0.91, + "grad_norm": 2.344582001195526, + "learning_rate": 2.1314572853353532e-07, + "loss": 0.5255, + "step": 12817 + }, + { + "epoch": 0.91, + "grad_norm": 1.8860609995611886, + "learning_rate": 2.1281390471018237e-07, + "loss": 0.4888, + "step": 12818 + }, + { + "epoch": 0.91, + "grad_norm": 1.6237938495543278, + "learning_rate": 2.1248233376222004e-07, + "loss": 0.5642, + "step": 12819 + }, + { + "epoch": 0.91, + "grad_norm": 1.4989483710994105, + "learning_rate": 2.1215101570716378e-07, + "loss": 0.4663, + "step": 12820 + }, + { + "epoch": 0.91, + "grad_norm": 1.8839064457118004, + "learning_rate": 2.1181995056251347e-07, + "loss": 0.4967, + "step": 12821 + }, + { + "epoch": 0.91, + "grad_norm": 2.9827257600123684, + "learning_rate": 2.1148913834575792e-07, + "loss": 0.5444, + "step": 12822 + }, + { + "epoch": 0.91, + "grad_norm": 0.699994246513223, + "learning_rate": 2.1115857907437198e-07, + "loss": 0.4297, + "step": 12823 + }, + { + "epoch": 0.91, + "grad_norm": 1.6446379236036026, + "learning_rate": 2.1082827276581675e-07, + "loss": 0.5753, + "step": 12824 + }, + { + "epoch": 0.91, + "grad_norm": 2.0595384597828805, + "learning_rate": 2.1049821943754046e-07, + "loss": 0.5815, + "step": 12825 + }, + { + "epoch": 0.91, + "grad_norm": 1.3846848506717584, + "learning_rate": 2.101684191069764e-07, + "loss": 0.4734, + "step": 12826 + }, + { + "epoch": 0.91, + "grad_norm": 1.8086867435699205, + "learning_rate": 2.0983887179154783e-07, + "loss": 0.5251, + "step": 12827 + }, + { + "epoch": 0.91, + "grad_norm": 0.7125358048893002, + "learning_rate": 2.095095775086603e-07, + "loss": 0.4274, + "step": 12828 + }, + { + "epoch": 0.91, + "grad_norm": 1.8489682335404216, + "learning_rate": 2.091805362757099e-07, + "loss": 0.5189, + "step": 12829 + }, + { + "epoch": 0.91, + "grad_norm": 2.6101176511957074, + "learning_rate": 2.0885174811007657e-07, + "loss": 0.556, + "step": 12830 + }, + { + "epoch": 0.91, + "grad_norm": 2.0717716811709175, + "learning_rate": 2.085232130291287e-07, + "loss": 0.6026, + "step": 12831 + }, + { + "epoch": 0.91, + "grad_norm": 2.2821167853749027, + "learning_rate": 2.0819493105022125e-07, + "loss": 0.5138, + "step": 12832 + }, + { + "epoch": 0.91, + "grad_norm": 1.8749203596707527, + "learning_rate": 2.078669021906926e-07, + "loss": 0.4689, + "step": 12833 + }, + { + "epoch": 0.91, + "grad_norm": 1.8277718936205098, + "learning_rate": 2.075391264678739e-07, + "loss": 0.5867, + "step": 12834 + }, + { + "epoch": 0.91, + "grad_norm": 1.8844913121038056, + "learning_rate": 2.072116038990768e-07, + "loss": 0.5441, + "step": 12835 + }, + { + "epoch": 0.91, + "grad_norm": 2.550975307952143, + "learning_rate": 2.0688433450160305e-07, + "loss": 0.5453, + "step": 12836 + }, + { + "epoch": 0.91, + "grad_norm": 2.023836812982778, + "learning_rate": 2.0655731829273994e-07, + "loss": 0.5032, + "step": 12837 + }, + { + "epoch": 0.91, + "grad_norm": 1.783704573568617, + "learning_rate": 2.06230555289762e-07, + "loss": 0.593, + "step": 12838 + }, + { + "epoch": 0.91, + "grad_norm": 2.114186099743231, + "learning_rate": 2.0590404550992982e-07, + "loss": 0.5509, + "step": 12839 + }, + { + "epoch": 0.91, + "grad_norm": 2.5462700982895363, + "learning_rate": 2.0557778897048963e-07, + "loss": 0.5157, + "step": 12840 + }, + { + "epoch": 0.91, + "grad_norm": 2.2016694914443002, + "learning_rate": 2.052517856886771e-07, + "loss": 0.4619, + "step": 12841 + }, + { + "epoch": 0.91, + "grad_norm": 1.8559301982643677, + "learning_rate": 2.0492603568171177e-07, + "loss": 0.5437, + "step": 12842 + }, + { + "epoch": 0.91, + "grad_norm": 2.081394745303622, + "learning_rate": 2.0460053896680154e-07, + "loss": 0.5655, + "step": 12843 + }, + { + "epoch": 0.91, + "grad_norm": 0.6221447411911857, + "learning_rate": 2.0427529556113935e-07, + "loss": 0.3869, + "step": 12844 + }, + { + "epoch": 0.91, + "grad_norm": 1.662511679133034, + "learning_rate": 2.0395030548190642e-07, + "loss": 0.4644, + "step": 12845 + }, + { + "epoch": 0.91, + "grad_norm": 1.7591217520153677, + "learning_rate": 2.0362556874627014e-07, + "loss": 0.5076, + "step": 12846 + }, + { + "epoch": 0.91, + "grad_norm": 2.260989049018104, + "learning_rate": 2.0330108537138294e-07, + "loss": 0.4696, + "step": 12847 + }, + { + "epoch": 0.91, + "grad_norm": 0.7195526476533084, + "learning_rate": 2.0297685537438606e-07, + "loss": 0.4231, + "step": 12848 + }, + { + "epoch": 0.91, + "grad_norm": 1.9764874279863391, + "learning_rate": 2.0265287877240581e-07, + "loss": 0.5531, + "step": 12849 + }, + { + "epoch": 0.91, + "grad_norm": 1.4763259200621193, + "learning_rate": 2.0232915558255738e-07, + "loss": 0.4975, + "step": 12850 + }, + { + "epoch": 0.91, + "grad_norm": 0.7424916912727368, + "learning_rate": 2.0200568582193881e-07, + "loss": 0.4222, + "step": 12851 + }, + { + "epoch": 0.91, + "grad_norm": 2.688235911427795, + "learning_rate": 2.0168246950763693e-07, + "loss": 0.5065, + "step": 12852 + }, + { + "epoch": 0.91, + "grad_norm": 0.6188613677531778, + "learning_rate": 2.01359506656727e-07, + "loss": 0.4055, + "step": 12853 + }, + { + "epoch": 0.91, + "grad_norm": 1.5553440897948918, + "learning_rate": 2.0103679728626758e-07, + "loss": 0.4857, + "step": 12854 + }, + { + "epoch": 0.91, + "grad_norm": 1.377726817849253, + "learning_rate": 2.007143414133067e-07, + "loss": 0.4942, + "step": 12855 + }, + { + "epoch": 0.91, + "grad_norm": 1.6666616184780845, + "learning_rate": 2.0039213905487465e-07, + "loss": 0.4693, + "step": 12856 + }, + { + "epoch": 0.91, + "grad_norm": 1.7864468025010933, + "learning_rate": 2.0007019022799445e-07, + "loss": 0.5379, + "step": 12857 + }, + { + "epoch": 0.91, + "grad_norm": 2.0977212474888742, + "learning_rate": 1.9974849494967086e-07, + "loss": 0.4979, + "step": 12858 + }, + { + "epoch": 0.91, + "grad_norm": 2.030358419235614, + "learning_rate": 1.9942705323689694e-07, + "loss": 0.4893, + "step": 12859 + }, + { + "epoch": 0.91, + "grad_norm": 1.7011193368483453, + "learning_rate": 1.9910586510665299e-07, + "loss": 0.4738, + "step": 12860 + }, + { + "epoch": 0.91, + "grad_norm": 1.9230292016443153, + "learning_rate": 1.9878493057590432e-07, + "loss": 0.5181, + "step": 12861 + }, + { + "epoch": 0.91, + "grad_norm": 1.7884005325590633, + "learning_rate": 1.9846424966160515e-07, + "loss": 0.4777, + "step": 12862 + }, + { + "epoch": 0.91, + "grad_norm": 1.7575842579734136, + "learning_rate": 1.9814382238069308e-07, + "loss": 0.5276, + "step": 12863 + }, + { + "epoch": 0.91, + "grad_norm": 2.2006834522212837, + "learning_rate": 1.9782364875009673e-07, + "loss": 0.4576, + "step": 12864 + }, + { + "epoch": 0.91, + "grad_norm": 1.626273087926516, + "learning_rate": 1.9750372878672596e-07, + "loss": 0.5338, + "step": 12865 + }, + { + "epoch": 0.91, + "grad_norm": 1.5913212737346931, + "learning_rate": 1.9718406250748167e-07, + "loss": 0.5507, + "step": 12866 + }, + { + "epoch": 0.91, + "grad_norm": 1.495878469826059, + "learning_rate": 1.9686464992924925e-07, + "loss": 0.4775, + "step": 12867 + }, + { + "epoch": 0.91, + "grad_norm": 1.9906105379181938, + "learning_rate": 1.965454910689013e-07, + "loss": 0.4879, + "step": 12868 + }, + { + "epoch": 0.91, + "grad_norm": 1.7350232831013752, + "learning_rate": 1.962265859432977e-07, + "loss": 0.5511, + "step": 12869 + }, + { + "epoch": 0.91, + "grad_norm": 7.556782631794976, + "learning_rate": 1.959079345692827e-07, + "loss": 0.516, + "step": 12870 + }, + { + "epoch": 0.91, + "grad_norm": 1.9255250889411737, + "learning_rate": 1.9558953696368842e-07, + "loss": 0.4848, + "step": 12871 + }, + { + "epoch": 0.91, + "grad_norm": 1.825290333706671, + "learning_rate": 1.9527139314333475e-07, + "loss": 0.4924, + "step": 12872 + }, + { + "epoch": 0.91, + "grad_norm": 2.2073369212312355, + "learning_rate": 1.9495350312502716e-07, + "loss": 0.4991, + "step": 12873 + }, + { + "epoch": 0.91, + "grad_norm": 1.4814136243599614, + "learning_rate": 1.946358669255566e-07, + "loss": 0.4935, + "step": 12874 + }, + { + "epoch": 0.91, + "grad_norm": 1.5403339762887331, + "learning_rate": 1.9431848456170242e-07, + "loss": 0.5147, + "step": 12875 + }, + { + "epoch": 0.91, + "grad_norm": 1.8254228369622763, + "learning_rate": 1.9400135605023073e-07, + "loss": 0.502, + "step": 12876 + }, + { + "epoch": 0.91, + "grad_norm": 0.7370825183249379, + "learning_rate": 1.9368448140789142e-07, + "loss": 0.4184, + "step": 12877 + }, + { + "epoch": 0.91, + "grad_norm": 1.7317346011737067, + "learning_rate": 1.9336786065142388e-07, + "loss": 0.528, + "step": 12878 + }, + { + "epoch": 0.91, + "grad_norm": 1.7917666993201937, + "learning_rate": 1.930514937975536e-07, + "loss": 0.5485, + "step": 12879 + }, + { + "epoch": 0.91, + "grad_norm": 1.8987064590859657, + "learning_rate": 1.9273538086299114e-07, + "loss": 0.5288, + "step": 12880 + }, + { + "epoch": 0.91, + "grad_norm": 2.309789541342419, + "learning_rate": 1.9241952186443535e-07, + "loss": 0.5374, + "step": 12881 + }, + { + "epoch": 0.91, + "grad_norm": 1.492034210007121, + "learning_rate": 1.921039168185701e-07, + "loss": 0.4767, + "step": 12882 + }, + { + "epoch": 0.91, + "grad_norm": 1.947001735988435, + "learning_rate": 1.9178856574206816e-07, + "loss": 0.4908, + "step": 12883 + }, + { + "epoch": 0.91, + "grad_norm": 2.2440130057016443, + "learning_rate": 1.9147346865158622e-07, + "loss": 0.5688, + "step": 12884 + }, + { + "epoch": 0.91, + "grad_norm": 2.321784957433587, + "learning_rate": 1.911586255637693e-07, + "loss": 0.5688, + "step": 12885 + }, + { + "epoch": 0.91, + "grad_norm": 1.8446689471314754, + "learning_rate": 1.9084403649524797e-07, + "loss": 0.5051, + "step": 12886 + }, + { + "epoch": 0.91, + "grad_norm": 1.5891090255506783, + "learning_rate": 1.905297014626406e-07, + "loss": 0.5222, + "step": 12887 + }, + { + "epoch": 0.91, + "grad_norm": 1.6392731513830539, + "learning_rate": 1.9021562048255116e-07, + "loss": 0.5712, + "step": 12888 + }, + { + "epoch": 0.91, + "grad_norm": 1.742516955596712, + "learning_rate": 1.8990179357156967e-07, + "loss": 0.5235, + "step": 12889 + }, + { + "epoch": 0.91, + "grad_norm": 2.126402558134192, + "learning_rate": 1.8958822074627514e-07, + "loss": 0.5827, + "step": 12890 + }, + { + "epoch": 0.91, + "grad_norm": 1.8140401783010942, + "learning_rate": 1.892749020232304e-07, + "loss": 0.5612, + "step": 12891 + }, + { + "epoch": 0.91, + "grad_norm": 1.886235217543352, + "learning_rate": 1.8896183741898722e-07, + "loss": 0.5316, + "step": 12892 + }, + { + "epoch": 0.91, + "grad_norm": 1.426671418592252, + "learning_rate": 1.8864902695008014e-07, + "loss": 0.4736, + "step": 12893 + }, + { + "epoch": 0.91, + "grad_norm": 1.5476836972174197, + "learning_rate": 1.8833647063303596e-07, + "loss": 0.4952, + "step": 12894 + }, + { + "epoch": 0.92, + "grad_norm": 1.8523745506733946, + "learning_rate": 1.8802416848436255e-07, + "loss": 0.5011, + "step": 12895 + }, + { + "epoch": 0.92, + "grad_norm": 2.143212631942806, + "learning_rate": 1.8771212052055844e-07, + "loss": 0.5122, + "step": 12896 + }, + { + "epoch": 0.92, + "grad_norm": 1.6903537676124054, + "learning_rate": 1.8740032675810594e-07, + "loss": 0.5643, + "step": 12897 + }, + { + "epoch": 0.92, + "grad_norm": 1.5930889272638418, + "learning_rate": 1.8708878721347524e-07, + "loss": 0.5283, + "step": 12898 + }, + { + "epoch": 0.92, + "grad_norm": 2.0722536460301124, + "learning_rate": 1.8677750190312426e-07, + "loss": 0.5421, + "step": 12899 + }, + { + "epoch": 0.92, + "grad_norm": 3.7060014090443762, + "learning_rate": 1.8646647084349434e-07, + "loss": 0.552, + "step": 12900 + }, + { + "epoch": 0.92, + "grad_norm": 1.6882200546782162, + "learning_rate": 1.8615569405101562e-07, + "loss": 0.5223, + "step": 12901 + }, + { + "epoch": 0.92, + "grad_norm": 1.969404544030653, + "learning_rate": 1.8584517154210502e-07, + "loss": 0.5157, + "step": 12902 + }, + { + "epoch": 0.92, + "grad_norm": 2.5291444453738237, + "learning_rate": 1.8553490333316492e-07, + "loss": 0.5181, + "step": 12903 + }, + { + "epoch": 0.92, + "grad_norm": 0.7739207126553875, + "learning_rate": 1.8522488944058502e-07, + "loss": 0.4127, + "step": 12904 + }, + { + "epoch": 0.92, + "grad_norm": 1.6170715476806818, + "learning_rate": 1.8491512988074057e-07, + "loss": 0.4686, + "step": 12905 + }, + { + "epoch": 0.92, + "grad_norm": 1.6434258864581324, + "learning_rate": 1.846056246699962e-07, + "loss": 0.4472, + "step": 12906 + }, + { + "epoch": 0.92, + "grad_norm": 1.7900514790010746, + "learning_rate": 1.8429637382469833e-07, + "loss": 0.4399, + "step": 12907 + }, + { + "epoch": 0.92, + "grad_norm": 1.9938809040544891, + "learning_rate": 1.8398737736118388e-07, + "loss": 0.5303, + "step": 12908 + }, + { + "epoch": 0.92, + "grad_norm": 4.719382279773229, + "learning_rate": 1.8367863529577479e-07, + "loss": 0.501, + "step": 12909 + }, + { + "epoch": 0.92, + "grad_norm": 1.524916660947038, + "learning_rate": 1.8337014764478079e-07, + "loss": 0.5738, + "step": 12910 + }, + { + "epoch": 0.92, + "grad_norm": 1.6391613195997803, + "learning_rate": 1.830619144244966e-07, + "loss": 0.505, + "step": 12911 + }, + { + "epoch": 0.92, + "grad_norm": 1.955297617854535, + "learning_rate": 1.8275393565120314e-07, + "loss": 0.5179, + "step": 12912 + }, + { + "epoch": 0.92, + "grad_norm": 0.7258159023083258, + "learning_rate": 1.824462113411707e-07, + "loss": 0.4318, + "step": 12913 + }, + { + "epoch": 0.92, + "grad_norm": 1.7253783274081733, + "learning_rate": 1.8213874151065348e-07, + "loss": 0.4779, + "step": 12914 + }, + { + "epoch": 0.92, + "grad_norm": 2.1495900239906627, + "learning_rate": 1.8183152617589294e-07, + "loss": 0.5049, + "step": 12915 + }, + { + "epoch": 0.92, + "grad_norm": 0.7018904727112434, + "learning_rate": 1.815245653531167e-07, + "loss": 0.4137, + "step": 12916 + }, + { + "epoch": 0.92, + "grad_norm": 1.593477688342379, + "learning_rate": 1.812178590585406e-07, + "loss": 0.6069, + "step": 12917 + }, + { + "epoch": 0.92, + "grad_norm": 1.5142734110805667, + "learning_rate": 1.809114073083662e-07, + "loss": 0.4079, + "step": 12918 + }, + { + "epoch": 0.92, + "grad_norm": 8.17440356412819, + "learning_rate": 1.8060521011877995e-07, + "loss": 0.5545, + "step": 12919 + }, + { + "epoch": 0.92, + "grad_norm": 1.8616406108413817, + "learning_rate": 1.8029926750595672e-07, + "loss": 0.5456, + "step": 12920 + }, + { + "epoch": 0.92, + "grad_norm": 1.5320606552668217, + "learning_rate": 1.7999357948605744e-07, + "loss": 0.5358, + "step": 12921 + }, + { + "epoch": 0.92, + "grad_norm": 1.7104067710413295, + "learning_rate": 1.7968814607523033e-07, + "loss": 0.5268, + "step": 12922 + }, + { + "epoch": 0.92, + "grad_norm": 1.8634205651369784, + "learning_rate": 1.7938296728960803e-07, + "loss": 0.5282, + "step": 12923 + }, + { + "epoch": 0.92, + "grad_norm": 1.7831057782822592, + "learning_rate": 1.7907804314531264e-07, + "loss": 0.5218, + "step": 12924 + }, + { + "epoch": 0.92, + "grad_norm": 1.4761489014103117, + "learning_rate": 1.7877337365845015e-07, + "loss": 0.5288, + "step": 12925 + }, + { + "epoch": 0.92, + "grad_norm": 1.5177750692583039, + "learning_rate": 1.784689588451144e-07, + "loss": 0.5133, + "step": 12926 + }, + { + "epoch": 0.92, + "grad_norm": 0.7070074477479356, + "learning_rate": 1.7816479872138582e-07, + "loss": 0.4217, + "step": 12927 + }, + { + "epoch": 0.92, + "grad_norm": 1.859807336449304, + "learning_rate": 1.77860893303331e-07, + "loss": 0.5878, + "step": 12928 + }, + { + "epoch": 0.92, + "grad_norm": 1.60485492447602, + "learning_rate": 1.7755724260700436e-07, + "loss": 0.4781, + "step": 12929 + }, + { + "epoch": 0.92, + "grad_norm": 1.7057507442705977, + "learning_rate": 1.7725384664844414e-07, + "loss": 0.5018, + "step": 12930 + }, + { + "epoch": 0.92, + "grad_norm": 0.7617218157424617, + "learning_rate": 1.7695070544367755e-07, + "loss": 0.4355, + "step": 12931 + }, + { + "epoch": 0.92, + "grad_norm": 1.9795185853486468, + "learning_rate": 1.766478190087173e-07, + "loss": 0.5105, + "step": 12932 + }, + { + "epoch": 0.92, + "grad_norm": 2.0044915010283266, + "learning_rate": 1.7634518735956342e-07, + "loss": 0.5381, + "step": 12933 + }, + { + "epoch": 0.92, + "grad_norm": 2.380680709333662, + "learning_rate": 1.760428105122014e-07, + "loss": 0.5169, + "step": 12934 + }, + { + "epoch": 0.92, + "grad_norm": 0.7450506881748511, + "learning_rate": 1.7574068848260294e-07, + "loss": 0.4219, + "step": 12935 + }, + { + "epoch": 0.92, + "grad_norm": 1.6672872785091464, + "learning_rate": 1.7543882128672973e-07, + "loss": 0.5387, + "step": 12936 + }, + { + "epoch": 0.92, + "grad_norm": 2.72782501872599, + "learning_rate": 1.751372089405251e-07, + "loss": 0.5131, + "step": 12937 + }, + { + "epoch": 0.92, + "grad_norm": 2.1108828035044804, + "learning_rate": 1.7483585145992132e-07, + "loss": 0.5521, + "step": 12938 + }, + { + "epoch": 0.92, + "grad_norm": 2.0062488267929224, + "learning_rate": 1.7453474886083843e-07, + "loss": 0.5304, + "step": 12939 + }, + { + "epoch": 0.92, + "grad_norm": 2.053886032685585, + "learning_rate": 1.7423390115918092e-07, + "loss": 0.5057, + "step": 12940 + }, + { + "epoch": 0.92, + "grad_norm": 1.7928015381275821, + "learning_rate": 1.7393330837084111e-07, + "loss": 0.5631, + "step": 12941 + }, + { + "epoch": 0.92, + "grad_norm": 0.783353010626, + "learning_rate": 1.736329705116957e-07, + "loss": 0.4148, + "step": 12942 + }, + { + "epoch": 0.92, + "grad_norm": 1.6023737831798517, + "learning_rate": 1.7333288759761202e-07, + "loss": 0.5371, + "step": 12943 + }, + { + "epoch": 0.92, + "grad_norm": 0.6379437929166991, + "learning_rate": 1.7303305964443962e-07, + "loss": 0.3711, + "step": 12944 + }, + { + "epoch": 0.92, + "grad_norm": 2.5684705795978933, + "learning_rate": 1.7273348666801693e-07, + "loss": 0.5627, + "step": 12945 + }, + { + "epoch": 0.92, + "grad_norm": 3.0498306854532005, + "learning_rate": 1.7243416868416852e-07, + "loss": 0.5087, + "step": 12946 + }, + { + "epoch": 0.92, + "grad_norm": 2.9834239020676803, + "learning_rate": 1.7213510570870562e-07, + "loss": 0.5206, + "step": 12947 + }, + { + "epoch": 0.92, + "grad_norm": 1.7965586393635196, + "learning_rate": 1.7183629775742562e-07, + "loss": 0.5905, + "step": 12948 + }, + { + "epoch": 0.92, + "grad_norm": 1.6256688813520603, + "learning_rate": 1.7153774484611197e-07, + "loss": 0.4501, + "step": 12949 + }, + { + "epoch": 0.92, + "grad_norm": 1.6445550447691175, + "learning_rate": 1.7123944699053596e-07, + "loss": 0.4924, + "step": 12950 + }, + { + "epoch": 0.92, + "grad_norm": 2.174917378145778, + "learning_rate": 1.7094140420645444e-07, + "loss": 0.5538, + "step": 12951 + }, + { + "epoch": 0.92, + "grad_norm": 1.872499079068012, + "learning_rate": 1.7064361650961093e-07, + "loss": 0.5719, + "step": 12952 + }, + { + "epoch": 0.92, + "grad_norm": 1.908903810145752, + "learning_rate": 1.7034608391573504e-07, + "loss": 0.5265, + "step": 12953 + }, + { + "epoch": 0.92, + "grad_norm": 1.802029100765159, + "learning_rate": 1.7004880644054533e-07, + "loss": 0.5364, + "step": 12954 + }, + { + "epoch": 0.92, + "grad_norm": 1.6602819759523981, + "learning_rate": 1.6975178409974314e-07, + "loss": 0.5229, + "step": 12955 + }, + { + "epoch": 0.92, + "grad_norm": 1.7913793450454798, + "learning_rate": 1.6945501690901867e-07, + "loss": 0.5196, + "step": 12956 + }, + { + "epoch": 0.92, + "grad_norm": 1.5727910401852632, + "learning_rate": 1.6915850488404883e-07, + "loss": 0.4425, + "step": 12957 + }, + { + "epoch": 0.92, + "grad_norm": 0.6914953083213943, + "learning_rate": 1.68862248040495e-07, + "loss": 0.4182, + "step": 12958 + }, + { + "epoch": 0.92, + "grad_norm": 1.6919504137537773, + "learning_rate": 1.6856624639400854e-07, + "loss": 0.5266, + "step": 12959 + }, + { + "epoch": 0.92, + "grad_norm": 1.8457808651664118, + "learning_rate": 1.682704999602236e-07, + "loss": 0.5596, + "step": 12960 + }, + { + "epoch": 0.92, + "grad_norm": 1.5418337502244523, + "learning_rate": 1.6797500875476268e-07, + "loss": 0.5586, + "step": 12961 + }, + { + "epoch": 0.92, + "grad_norm": 1.6667032756442683, + "learning_rate": 1.67679772793235e-07, + "loss": 0.5379, + "step": 12962 + }, + { + "epoch": 0.92, + "grad_norm": 1.7763069559378224, + "learning_rate": 1.6738479209123582e-07, + "loss": 0.5005, + "step": 12963 + }, + { + "epoch": 0.92, + "grad_norm": 1.8017231985404945, + "learning_rate": 1.6709006666434768e-07, + "loss": 0.5237, + "step": 12964 + }, + { + "epoch": 0.92, + "grad_norm": 0.688322929159905, + "learning_rate": 1.6679559652813759e-07, + "loss": 0.4285, + "step": 12965 + }, + { + "epoch": 0.92, + "grad_norm": 1.6054994581745439, + "learning_rate": 1.6650138169816198e-07, + "loss": 0.4827, + "step": 12966 + }, + { + "epoch": 0.92, + "grad_norm": 2.6461681980305376, + "learning_rate": 1.6620742218996066e-07, + "loss": 0.5176, + "step": 12967 + }, + { + "epoch": 0.92, + "grad_norm": 1.7298947221846763, + "learning_rate": 1.6591371801906287e-07, + "loss": 0.492, + "step": 12968 + }, + { + "epoch": 0.92, + "grad_norm": 1.9482126102008412, + "learning_rate": 1.6562026920098228e-07, + "loss": 0.4801, + "step": 12969 + }, + { + "epoch": 0.92, + "grad_norm": 0.6744132220614073, + "learning_rate": 1.6532707575122043e-07, + "loss": 0.4325, + "step": 12970 + }, + { + "epoch": 0.92, + "grad_norm": 1.6152341087048243, + "learning_rate": 1.650341376852649e-07, + "loss": 0.5397, + "step": 12971 + }, + { + "epoch": 0.92, + "grad_norm": 0.6641516706584203, + "learning_rate": 1.6474145501858884e-07, + "loss": 0.4264, + "step": 12972 + }, + { + "epoch": 0.92, + "grad_norm": 1.7693987954550874, + "learning_rate": 1.6444902776665385e-07, + "loss": 0.5231, + "step": 12973 + }, + { + "epoch": 0.92, + "grad_norm": 1.8103811375228294, + "learning_rate": 1.6415685594490526e-07, + "loss": 0.5786, + "step": 12974 + }, + { + "epoch": 0.92, + "grad_norm": 1.7824358921651722, + "learning_rate": 1.6386493956877858e-07, + "loss": 0.477, + "step": 12975 + }, + { + "epoch": 0.92, + "grad_norm": 1.7230891729480233, + "learning_rate": 1.63573278653692e-07, + "loss": 0.5816, + "step": 12976 + }, + { + "epoch": 0.92, + "grad_norm": 2.0074024522536655, + "learning_rate": 1.6328187321505318e-07, + "loss": 0.554, + "step": 12977 + }, + { + "epoch": 0.92, + "grad_norm": 2.0822021630381795, + "learning_rate": 1.6299072326825592e-07, + "loss": 0.6178, + "step": 12978 + }, + { + "epoch": 0.92, + "grad_norm": 1.5767552778138463, + "learning_rate": 1.626998288286774e-07, + "loss": 0.5159, + "step": 12979 + }, + { + "epoch": 0.92, + "grad_norm": 2.5488718119615865, + "learning_rate": 1.624091899116853e-07, + "loss": 0.5089, + "step": 12980 + }, + { + "epoch": 0.92, + "grad_norm": 1.5226089081896337, + "learning_rate": 1.621188065326318e-07, + "loss": 0.5701, + "step": 12981 + }, + { + "epoch": 0.92, + "grad_norm": 1.8386362128201983, + "learning_rate": 1.6182867870685626e-07, + "loss": 0.5297, + "step": 12982 + }, + { + "epoch": 0.92, + "grad_norm": 1.5817653038690747, + "learning_rate": 1.6153880644968366e-07, + "loss": 0.4617, + "step": 12983 + }, + { + "epoch": 0.92, + "grad_norm": 1.7132260715716927, + "learning_rate": 1.6124918977642512e-07, + "loss": 0.5314, + "step": 12984 + }, + { + "epoch": 0.92, + "grad_norm": 1.5375403619073302, + "learning_rate": 1.6095982870238168e-07, + "loss": 0.4922, + "step": 12985 + }, + { + "epoch": 0.92, + "grad_norm": 1.5629476524544272, + "learning_rate": 1.6067072324283672e-07, + "loss": 0.4685, + "step": 12986 + }, + { + "epoch": 0.92, + "grad_norm": 1.7658817587709719, + "learning_rate": 1.603818734130619e-07, + "loss": 0.5544, + "step": 12987 + }, + { + "epoch": 0.92, + "grad_norm": 1.7313781386637763, + "learning_rate": 1.6009327922831552e-07, + "loss": 0.5026, + "step": 12988 + }, + { + "epoch": 0.92, + "grad_norm": 1.7609404349592683, + "learning_rate": 1.5980494070384212e-07, + "loss": 0.5699, + "step": 12989 + }, + { + "epoch": 0.92, + "grad_norm": 1.7286487279570024, + "learning_rate": 1.5951685785487226e-07, + "loss": 0.5565, + "step": 12990 + }, + { + "epoch": 0.92, + "grad_norm": 1.7080096126514979, + "learning_rate": 1.5922903069662378e-07, + "loss": 0.5682, + "step": 12991 + }, + { + "epoch": 0.92, + "grad_norm": 2.383334416297703, + "learning_rate": 1.589414592443006e-07, + "loss": 0.4827, + "step": 12992 + }, + { + "epoch": 0.92, + "grad_norm": 0.7022119367114747, + "learning_rate": 1.5865414351309339e-07, + "loss": 0.4336, + "step": 12993 + }, + { + "epoch": 0.92, + "grad_norm": 1.6622722116674689, + "learning_rate": 1.583670835181794e-07, + "loss": 0.4744, + "step": 12994 + }, + { + "epoch": 0.92, + "grad_norm": 2.0537273027663328, + "learning_rate": 1.5808027927472103e-07, + "loss": 0.5824, + "step": 12995 + }, + { + "epoch": 0.92, + "grad_norm": 1.4674304776984295, + "learning_rate": 1.5779373079786998e-07, + "loss": 0.477, + "step": 12996 + }, + { + "epoch": 0.92, + "grad_norm": 1.8495907972420185, + "learning_rate": 1.575074381027608e-07, + "loss": 0.5643, + "step": 12997 + }, + { + "epoch": 0.92, + "grad_norm": 1.7132091901153577, + "learning_rate": 1.5722140120451756e-07, + "loss": 0.4618, + "step": 12998 + }, + { + "epoch": 0.92, + "grad_norm": 1.865561935712061, + "learning_rate": 1.5693562011824926e-07, + "loss": 0.5545, + "step": 12999 + }, + { + "epoch": 0.92, + "grad_norm": 1.6767901040331972, + "learning_rate": 1.5665009485905215e-07, + "loss": 0.4746, + "step": 13000 + }, + { + "epoch": 0.92, + "grad_norm": 1.6671333648425857, + "learning_rate": 1.5636482544200915e-07, + "loss": 0.475, + "step": 13001 + }, + { + "epoch": 0.92, + "grad_norm": 1.6393118670633124, + "learning_rate": 1.5607981188218768e-07, + "loss": 0.5236, + "step": 13002 + }, + { + "epoch": 0.92, + "grad_norm": 1.9246373500097729, + "learning_rate": 1.5579505419464514e-07, + "loss": 0.5057, + "step": 13003 + }, + { + "epoch": 0.92, + "grad_norm": 1.7020835671781742, + "learning_rate": 1.5551055239442114e-07, + "loss": 0.4826, + "step": 13004 + }, + { + "epoch": 0.92, + "grad_norm": 1.4714402837846494, + "learning_rate": 1.5522630649654537e-07, + "loss": 0.4459, + "step": 13005 + }, + { + "epoch": 0.92, + "grad_norm": 1.6765312525052818, + "learning_rate": 1.5494231651603242e-07, + "loss": 0.4729, + "step": 13006 + }, + { + "epoch": 0.92, + "grad_norm": 2.3494689403228066, + "learning_rate": 1.546585824678837e-07, + "loss": 0.5193, + "step": 13007 + }, + { + "epoch": 0.92, + "grad_norm": 1.4856063160945567, + "learning_rate": 1.5437510436708768e-07, + "loss": 0.4676, + "step": 13008 + }, + { + "epoch": 0.92, + "grad_norm": 2.023093565145505, + "learning_rate": 1.5409188222861694e-07, + "loss": 0.5906, + "step": 13009 + }, + { + "epoch": 0.92, + "grad_norm": 2.2131889042579553, + "learning_rate": 1.538089160674333e-07, + "loss": 0.5314, + "step": 13010 + }, + { + "epoch": 0.92, + "grad_norm": 0.6343516224210075, + "learning_rate": 1.5352620589848376e-07, + "loss": 0.3964, + "step": 13011 + }, + { + "epoch": 0.92, + "grad_norm": 1.7721008873206334, + "learning_rate": 1.5324375173670247e-07, + "loss": 0.5245, + "step": 13012 + }, + { + "epoch": 0.92, + "grad_norm": 1.845964985329556, + "learning_rate": 1.5296155359700972e-07, + "loss": 0.5608, + "step": 13013 + }, + { + "epoch": 0.92, + "grad_norm": 1.6836798460190794, + "learning_rate": 1.526796114943102e-07, + "loss": 0.4429, + "step": 13014 + }, + { + "epoch": 0.92, + "grad_norm": 1.7116603300412874, + "learning_rate": 1.5239792544349984e-07, + "loss": 0.5798, + "step": 13015 + }, + { + "epoch": 0.92, + "grad_norm": 1.7006079890203314, + "learning_rate": 1.5211649545945672e-07, + "loss": 0.5197, + "step": 13016 + }, + { + "epoch": 0.92, + "grad_norm": 1.7326668589559964, + "learning_rate": 1.5183532155704673e-07, + "loss": 0.4816, + "step": 13017 + }, + { + "epoch": 0.92, + "grad_norm": 1.8070746475993482, + "learning_rate": 1.5155440375112297e-07, + "loss": 0.4856, + "step": 13018 + }, + { + "epoch": 0.92, + "grad_norm": 2.0171448276874058, + "learning_rate": 1.5127374205652523e-07, + "loss": 0.5531, + "step": 13019 + }, + { + "epoch": 0.92, + "grad_norm": 1.9334040531277459, + "learning_rate": 1.5099333648807723e-07, + "loss": 0.5282, + "step": 13020 + }, + { + "epoch": 0.92, + "grad_norm": 1.5708952461251648, + "learning_rate": 1.5071318706059157e-07, + "loss": 0.5379, + "step": 13021 + }, + { + "epoch": 0.92, + "grad_norm": 1.7870386557785074, + "learning_rate": 1.5043329378886805e-07, + "loss": 0.4584, + "step": 13022 + }, + { + "epoch": 0.92, + "grad_norm": 1.8744683596755989, + "learning_rate": 1.5015365668769044e-07, + "loss": 0.5765, + "step": 13023 + }, + { + "epoch": 0.92, + "grad_norm": 1.6658297434180285, + "learning_rate": 1.498742757718302e-07, + "loss": 0.5002, + "step": 13024 + }, + { + "epoch": 0.92, + "grad_norm": 2.337214101702783, + "learning_rate": 1.4959515105604394e-07, + "loss": 0.5541, + "step": 13025 + }, + { + "epoch": 0.92, + "grad_norm": 1.718442721079696, + "learning_rate": 1.493162825550787e-07, + "loss": 0.519, + "step": 13026 + }, + { + "epoch": 0.92, + "grad_norm": 1.7212674390774225, + "learning_rate": 1.4903767028366322e-07, + "loss": 0.5096, + "step": 13027 + }, + { + "epoch": 0.92, + "grad_norm": 1.6180712900647878, + "learning_rate": 1.4875931425651524e-07, + "loss": 0.5082, + "step": 13028 + }, + { + "epoch": 0.92, + "grad_norm": 1.649319677550335, + "learning_rate": 1.4848121448833853e-07, + "loss": 0.5507, + "step": 13029 + }, + { + "epoch": 0.92, + "grad_norm": 1.8671984204542902, + "learning_rate": 1.4820337099382298e-07, + "loss": 0.5955, + "step": 13030 + }, + { + "epoch": 0.92, + "grad_norm": 1.6008257395326015, + "learning_rate": 1.4792578378764633e-07, + "loss": 0.4505, + "step": 13031 + }, + { + "epoch": 0.92, + "grad_norm": 1.8350178688823395, + "learning_rate": 1.476484528844696e-07, + "loss": 0.5242, + "step": 13032 + }, + { + "epoch": 0.92, + "grad_norm": 1.733489744176243, + "learning_rate": 1.4737137829894498e-07, + "loss": 0.5331, + "step": 13033 + }, + { + "epoch": 0.92, + "grad_norm": 1.6559441700619066, + "learning_rate": 1.4709456004570632e-07, + "loss": 0.5728, + "step": 13034 + }, + { + "epoch": 0.92, + "grad_norm": 1.737022618526114, + "learning_rate": 1.4681799813937692e-07, + "loss": 0.5684, + "step": 13035 + }, + { + "epoch": 0.93, + "grad_norm": 1.5818804199599092, + "learning_rate": 1.4654169259456563e-07, + "loss": 0.5209, + "step": 13036 + }, + { + "epoch": 0.93, + "grad_norm": 1.6304304279246429, + "learning_rate": 1.4626564342586802e-07, + "loss": 0.4715, + "step": 13037 + }, + { + "epoch": 0.93, + "grad_norm": 1.7198812345857497, + "learning_rate": 1.4598985064786632e-07, + "loss": 0.5238, + "step": 13038 + }, + { + "epoch": 0.93, + "grad_norm": 1.5568735909496947, + "learning_rate": 1.4571431427512771e-07, + "loss": 0.5053, + "step": 13039 + }, + { + "epoch": 0.93, + "grad_norm": 2.129023849986105, + "learning_rate": 1.4543903432220784e-07, + "loss": 0.5162, + "step": 13040 + }, + { + "epoch": 0.93, + "grad_norm": 1.7686448822993477, + "learning_rate": 1.4516401080364728e-07, + "loss": 0.5531, + "step": 13041 + }, + { + "epoch": 0.93, + "grad_norm": 1.6959646897344067, + "learning_rate": 1.448892437339744e-07, + "loss": 0.518, + "step": 13042 + }, + { + "epoch": 0.93, + "grad_norm": 1.8945615544536747, + "learning_rate": 1.4461473312770368e-07, + "loss": 0.5222, + "step": 13043 + }, + { + "epoch": 0.93, + "grad_norm": 1.584443791967715, + "learning_rate": 1.4434047899933357e-07, + "loss": 0.484, + "step": 13044 + }, + { + "epoch": 0.93, + "grad_norm": 1.6515760411101494, + "learning_rate": 1.4406648136335412e-07, + "loss": 0.4753, + "step": 13045 + }, + { + "epoch": 0.93, + "grad_norm": 1.5330660454752978, + "learning_rate": 1.437927402342365e-07, + "loss": 0.479, + "step": 13046 + }, + { + "epoch": 0.93, + "grad_norm": 1.6520645678223127, + "learning_rate": 1.4351925562644143e-07, + "loss": 0.5549, + "step": 13047 + }, + { + "epoch": 0.93, + "grad_norm": 1.6327099860829077, + "learning_rate": 1.4324602755441507e-07, + "loss": 0.505, + "step": 13048 + }, + { + "epoch": 0.93, + "grad_norm": 1.6430968811586766, + "learning_rate": 1.4297305603259037e-07, + "loss": 0.5296, + "step": 13049 + }, + { + "epoch": 0.93, + "grad_norm": 2.0257047347751134, + "learning_rate": 1.4270034107538743e-07, + "loss": 0.6322, + "step": 13050 + }, + { + "epoch": 0.93, + "grad_norm": 2.013108784179735, + "learning_rate": 1.424278826972103e-07, + "loss": 0.5249, + "step": 13051 + }, + { + "epoch": 0.93, + "grad_norm": 1.8433861695789293, + "learning_rate": 1.4215568091245359e-07, + "loss": 0.5822, + "step": 13052 + }, + { + "epoch": 0.93, + "grad_norm": 1.5594359918863692, + "learning_rate": 1.4188373573549297e-07, + "loss": 0.4951, + "step": 13053 + }, + { + "epoch": 0.93, + "grad_norm": 2.0655393455338342, + "learning_rate": 1.4161204718069642e-07, + "loss": 0.5276, + "step": 13054 + }, + { + "epoch": 0.93, + "grad_norm": 1.727087456292135, + "learning_rate": 1.413406152624125e-07, + "loss": 0.5056, + "step": 13055 + }, + { + "epoch": 0.93, + "grad_norm": 1.9409173488756806, + "learning_rate": 1.4106943999498134e-07, + "loss": 0.5038, + "step": 13056 + }, + { + "epoch": 0.93, + "grad_norm": 2.0830462774232683, + "learning_rate": 1.4079852139272653e-07, + "loss": 0.5438, + "step": 13057 + }, + { + "epoch": 0.93, + "grad_norm": 1.720752273696097, + "learning_rate": 1.405278594699594e-07, + "loss": 0.5014, + "step": 13058 + }, + { + "epoch": 0.93, + "grad_norm": 1.6848732508131365, + "learning_rate": 1.4025745424097626e-07, + "loss": 0.559, + "step": 13059 + }, + { + "epoch": 0.93, + "grad_norm": 2.118371998408966, + "learning_rate": 1.399873057200618e-07, + "loss": 0.4875, + "step": 13060 + }, + { + "epoch": 0.93, + "grad_norm": 3.489592294769544, + "learning_rate": 1.397174139214863e-07, + "loss": 0.5375, + "step": 13061 + }, + { + "epoch": 0.93, + "grad_norm": 1.6580948634680197, + "learning_rate": 1.39447778859505e-07, + "loss": 0.5761, + "step": 13062 + }, + { + "epoch": 0.93, + "grad_norm": 1.7335255440823734, + "learning_rate": 1.3917840054836262e-07, + "loss": 0.5452, + "step": 13063 + }, + { + "epoch": 0.93, + "grad_norm": 1.7933710022876321, + "learning_rate": 1.3890927900228722e-07, + "loss": 0.6088, + "step": 13064 + }, + { + "epoch": 0.93, + "grad_norm": 1.711963779422511, + "learning_rate": 1.3864041423549525e-07, + "loss": 0.5512, + "step": 13065 + }, + { + "epoch": 0.93, + "grad_norm": 1.7395271176421412, + "learning_rate": 1.3837180626218915e-07, + "loss": 0.5794, + "step": 13066 + }, + { + "epoch": 0.93, + "grad_norm": 1.643545145278159, + "learning_rate": 1.381034550965582e-07, + "loss": 0.5621, + "step": 13067 + }, + { + "epoch": 0.93, + "grad_norm": 1.6572539590163193, + "learning_rate": 1.3783536075277716e-07, + "loss": 0.4477, + "step": 13068 + }, + { + "epoch": 0.93, + "grad_norm": 1.5479326231006578, + "learning_rate": 1.3756752324500745e-07, + "loss": 0.4436, + "step": 13069 + }, + { + "epoch": 0.93, + "grad_norm": 1.5025543120957576, + "learning_rate": 1.372999425873972e-07, + "loss": 0.5328, + "step": 13070 + }, + { + "epoch": 0.93, + "grad_norm": 1.6814224765009957, + "learning_rate": 1.370326187940807e-07, + "loss": 0.5517, + "step": 13071 + }, + { + "epoch": 0.93, + "grad_norm": 2.817323364321787, + "learning_rate": 1.367655518791794e-07, + "loss": 0.4577, + "step": 13072 + }, + { + "epoch": 0.93, + "grad_norm": 1.4750838518576463, + "learning_rate": 1.3649874185680146e-07, + "loss": 0.4706, + "step": 13073 + }, + { + "epoch": 0.93, + "grad_norm": 1.540199066560695, + "learning_rate": 1.3623218874103839e-07, + "loss": 0.5007, + "step": 13074 + }, + { + "epoch": 0.93, + "grad_norm": 0.6457275121594575, + "learning_rate": 1.3596589254597281e-07, + "loss": 0.3917, + "step": 13075 + }, + { + "epoch": 0.93, + "grad_norm": 1.818209867825898, + "learning_rate": 1.3569985328567015e-07, + "loss": 0.5297, + "step": 13076 + }, + { + "epoch": 0.93, + "grad_norm": 1.5166010493626751, + "learning_rate": 1.354340709741836e-07, + "loss": 0.4727, + "step": 13077 + }, + { + "epoch": 0.93, + "grad_norm": 1.7221167440258964, + "learning_rate": 1.351685456255525e-07, + "loss": 0.5726, + "step": 13078 + }, + { + "epoch": 0.93, + "grad_norm": 1.531053084822976, + "learning_rate": 1.3490327725380336e-07, + "loss": 0.4864, + "step": 13079 + }, + { + "epoch": 0.93, + "grad_norm": 1.7294173900272511, + "learning_rate": 1.3463826587294893e-07, + "loss": 0.5096, + "step": 13080 + }, + { + "epoch": 0.93, + "grad_norm": 1.737765610236196, + "learning_rate": 1.343735114969863e-07, + "loss": 0.5358, + "step": 13081 + }, + { + "epoch": 0.93, + "grad_norm": 1.553726035162499, + "learning_rate": 1.3410901413990319e-07, + "loss": 0.4798, + "step": 13082 + }, + { + "epoch": 0.93, + "grad_norm": 1.7022280947040254, + "learning_rate": 1.3384477381566895e-07, + "loss": 0.53, + "step": 13083 + }, + { + "epoch": 0.93, + "grad_norm": 1.7551485095536432, + "learning_rate": 1.3358079053824303e-07, + "loss": 0.5022, + "step": 13084 + }, + { + "epoch": 0.93, + "grad_norm": 1.8535672509979602, + "learning_rate": 1.3331706432156811e-07, + "loss": 0.5231, + "step": 13085 + }, + { + "epoch": 0.93, + "grad_norm": 1.5336442029713055, + "learning_rate": 1.3305359517957806e-07, + "loss": 0.4335, + "step": 13086 + }, + { + "epoch": 0.93, + "grad_norm": 1.598157428207112, + "learning_rate": 1.3279038312618787e-07, + "loss": 0.4749, + "step": 13087 + }, + { + "epoch": 0.93, + "grad_norm": 1.8120364155108648, + "learning_rate": 1.325274281753014e-07, + "loss": 0.5252, + "step": 13088 + }, + { + "epoch": 0.93, + "grad_norm": 1.8427128825035377, + "learning_rate": 1.3226473034081032e-07, + "loss": 0.4892, + "step": 13089 + }, + { + "epoch": 0.93, + "grad_norm": 1.8975669572540133, + "learning_rate": 1.3200228963658969e-07, + "loss": 0.561, + "step": 13090 + }, + { + "epoch": 0.93, + "grad_norm": 6.304275851382534, + "learning_rate": 1.3174010607650388e-07, + "loss": 0.5146, + "step": 13091 + }, + { + "epoch": 0.93, + "grad_norm": 1.6853084049805058, + "learning_rate": 1.3147817967440135e-07, + "loss": 0.5573, + "step": 13092 + }, + { + "epoch": 0.93, + "grad_norm": 1.6348634174268335, + "learning_rate": 1.3121651044411766e-07, + "loss": 0.5386, + "step": 13093 + }, + { + "epoch": 0.93, + "grad_norm": 1.6982856343962642, + "learning_rate": 1.3095509839947618e-07, + "loss": 0.6194, + "step": 13094 + }, + { + "epoch": 0.93, + "grad_norm": 1.8169435184716034, + "learning_rate": 1.3069394355428422e-07, + "loss": 0.4989, + "step": 13095 + }, + { + "epoch": 0.93, + "grad_norm": 1.6181642891386452, + "learning_rate": 1.304330459223385e-07, + "loss": 0.5606, + "step": 13096 + }, + { + "epoch": 0.93, + "grad_norm": 1.8013133583327057, + "learning_rate": 1.3017240551741918e-07, + "loss": 0.531, + "step": 13097 + }, + { + "epoch": 0.93, + "grad_norm": 1.8718321757105212, + "learning_rate": 1.2991202235329515e-07, + "loss": 0.5286, + "step": 13098 + }, + { + "epoch": 0.93, + "grad_norm": 1.5258082363787606, + "learning_rate": 1.2965189644371934e-07, + "loss": 0.4587, + "step": 13099 + }, + { + "epoch": 0.93, + "grad_norm": 2.575738718281483, + "learning_rate": 1.2939202780243353e-07, + "loss": 0.5333, + "step": 13100 + }, + { + "epoch": 0.93, + "grad_norm": 1.4574833079588099, + "learning_rate": 1.291324164431651e-07, + "loss": 0.469, + "step": 13101 + }, + { + "epoch": 0.93, + "grad_norm": 1.8599377560820922, + "learning_rate": 1.2887306237962694e-07, + "loss": 0.5451, + "step": 13102 + }, + { + "epoch": 0.93, + "grad_norm": 1.7914601733164435, + "learning_rate": 1.2861396562551974e-07, + "loss": 0.5749, + "step": 13103 + }, + { + "epoch": 0.93, + "grad_norm": 2.0777094060284287, + "learning_rate": 1.2835512619452873e-07, + "loss": 0.5475, + "step": 13104 + }, + { + "epoch": 0.93, + "grad_norm": 1.696790443675144, + "learning_rate": 1.280965441003279e-07, + "loss": 0.4663, + "step": 13105 + }, + { + "epoch": 0.93, + "grad_norm": 1.6115377858631736, + "learning_rate": 1.2783821935657526e-07, + "loss": 0.4983, + "step": 13106 + }, + { + "epoch": 0.93, + "grad_norm": 1.6822834303369707, + "learning_rate": 1.2758015197691709e-07, + "loss": 0.518, + "step": 13107 + }, + { + "epoch": 0.93, + "grad_norm": 1.792006926387145, + "learning_rate": 1.2732234197498582e-07, + "loss": 0.479, + "step": 13108 + }, + { + "epoch": 0.93, + "grad_norm": 1.5893516342800516, + "learning_rate": 1.2706478936439893e-07, + "loss": 0.5412, + "step": 13109 + }, + { + "epoch": 0.93, + "grad_norm": 1.426800813490248, + "learning_rate": 1.2680749415876214e-07, + "loss": 0.4469, + "step": 13110 + }, + { + "epoch": 0.93, + "grad_norm": 1.8734684237609165, + "learning_rate": 1.2655045637166574e-07, + "loss": 0.5302, + "step": 13111 + }, + { + "epoch": 0.93, + "grad_norm": 0.6738272762311658, + "learning_rate": 1.2629367601668774e-07, + "loss": 0.4148, + "step": 13112 + }, + { + "epoch": 0.93, + "grad_norm": 1.7062137750487198, + "learning_rate": 1.2603715310739174e-07, + "loss": 0.5036, + "step": 13113 + }, + { + "epoch": 0.93, + "grad_norm": 1.7881646548596797, + "learning_rate": 1.2578088765732964e-07, + "loss": 0.488, + "step": 13114 + }, + { + "epoch": 0.93, + "grad_norm": 1.6797741373226827, + "learning_rate": 1.2552487968003568e-07, + "loss": 0.4819, + "step": 13115 + }, + { + "epoch": 0.93, + "grad_norm": 1.6894196033551132, + "learning_rate": 1.2526912918903512e-07, + "loss": 0.5467, + "step": 13116 + }, + { + "epoch": 0.93, + "grad_norm": 1.9169292136333569, + "learning_rate": 1.250136361978377e-07, + "loss": 0.5154, + "step": 13117 + }, + { + "epoch": 0.93, + "grad_norm": 1.61196562998539, + "learning_rate": 1.2475840071993817e-07, + "loss": 0.472, + "step": 13118 + }, + { + "epoch": 0.93, + "grad_norm": 2.01850817310306, + "learning_rate": 1.2450342276881965e-07, + "loss": 0.5408, + "step": 13119 + }, + { + "epoch": 0.93, + "grad_norm": 1.6473311978392116, + "learning_rate": 1.2424870235795027e-07, + "loss": 0.5007, + "step": 13120 + }, + { + "epoch": 0.93, + "grad_norm": 1.534949153447226, + "learning_rate": 1.2399423950078704e-07, + "loss": 0.5446, + "step": 13121 + }, + { + "epoch": 0.93, + "grad_norm": 1.6405486443696216, + "learning_rate": 1.2374003421076918e-07, + "loss": 0.4591, + "step": 13122 + }, + { + "epoch": 0.93, + "grad_norm": 1.6499520361411846, + "learning_rate": 1.23486086501326e-07, + "loss": 0.5014, + "step": 13123 + }, + { + "epoch": 0.93, + "grad_norm": 1.7023345175831042, + "learning_rate": 1.2323239638587114e-07, + "loss": 0.5725, + "step": 13124 + }, + { + "epoch": 0.93, + "grad_norm": 0.6342419726149009, + "learning_rate": 1.2297896387780616e-07, + "loss": 0.3731, + "step": 13125 + }, + { + "epoch": 0.93, + "grad_norm": 1.7031899165624982, + "learning_rate": 1.2272578899051867e-07, + "loss": 0.4905, + "step": 13126 + }, + { + "epoch": 0.93, + "grad_norm": 1.6239537717422563, + "learning_rate": 1.2247287173738021e-07, + "loss": 0.4779, + "step": 13127 + }, + { + "epoch": 0.93, + "grad_norm": 1.565949404250858, + "learning_rate": 1.2222021213175284e-07, + "loss": 0.4494, + "step": 13128 + }, + { + "epoch": 0.93, + "grad_norm": 1.8263637750978845, + "learning_rate": 1.2196781018698146e-07, + "loss": 0.5697, + "step": 13129 + }, + { + "epoch": 0.93, + "grad_norm": 1.4128947497612738, + "learning_rate": 1.2171566591639982e-07, + "loss": 0.4575, + "step": 13130 + }, + { + "epoch": 0.93, + "grad_norm": 1.8939840990048236, + "learning_rate": 1.214637793333262e-07, + "loss": 0.5038, + "step": 13131 + }, + { + "epoch": 0.93, + "grad_norm": 1.857407273090847, + "learning_rate": 1.212121504510666e-07, + "loss": 0.5351, + "step": 13132 + }, + { + "epoch": 0.93, + "grad_norm": 1.5820624596837314, + "learning_rate": 1.2096077928291317e-07, + "loss": 0.5282, + "step": 13133 + }, + { + "epoch": 0.93, + "grad_norm": 1.6572627484708098, + "learning_rate": 1.2070966584214306e-07, + "loss": 0.555, + "step": 13134 + }, + { + "epoch": 0.93, + "grad_norm": 4.279583270568435, + "learning_rate": 1.2045881014202287e-07, + "loss": 0.5086, + "step": 13135 + }, + { + "epoch": 0.93, + "grad_norm": 1.7585715390152703, + "learning_rate": 1.2020821219580147e-07, + "loss": 0.4939, + "step": 13136 + }, + { + "epoch": 0.93, + "grad_norm": 1.6013342920723699, + "learning_rate": 1.199578720167177e-07, + "loss": 0.4879, + "step": 13137 + }, + { + "epoch": 0.93, + "grad_norm": 1.7368354117302693, + "learning_rate": 1.197077896179949e-07, + "loss": 0.6018, + "step": 13138 + }, + { + "epoch": 0.93, + "grad_norm": 1.955171683593444, + "learning_rate": 1.1945796501284358e-07, + "loss": 0.542, + "step": 13139 + }, + { + "epoch": 0.93, + "grad_norm": 1.9561572229997517, + "learning_rate": 1.1920839821445984e-07, + "loss": 0.4853, + "step": 13140 + }, + { + "epoch": 0.93, + "grad_norm": 1.6026857307758637, + "learning_rate": 1.1895908923602706e-07, + "loss": 0.5634, + "step": 13141 + }, + { + "epoch": 0.93, + "grad_norm": 1.5758273573583133, + "learning_rate": 1.1871003809071413e-07, + "loss": 0.5518, + "step": 13142 + }, + { + "epoch": 0.93, + "grad_norm": 1.6775533556240612, + "learning_rate": 1.184612447916772e-07, + "loss": 0.5188, + "step": 13143 + }, + { + "epoch": 0.93, + "grad_norm": 1.7694038347531653, + "learning_rate": 1.1821270935205797e-07, + "loss": 0.4856, + "step": 13144 + }, + { + "epoch": 0.93, + "grad_norm": 1.6027565909070323, + "learning_rate": 1.179644317849854e-07, + "loss": 0.5485, + "step": 13145 + }, + { + "epoch": 0.93, + "grad_norm": 1.5285317167668322, + "learning_rate": 1.1771641210357399e-07, + "loss": 0.4865, + "step": 13146 + }, + { + "epoch": 0.93, + "grad_norm": 1.488570737291413, + "learning_rate": 1.1746865032092548e-07, + "loss": 0.5158, + "step": 13147 + }, + { + "epoch": 0.93, + "grad_norm": 2.332779008516804, + "learning_rate": 1.1722114645012717e-07, + "loss": 0.5638, + "step": 13148 + }, + { + "epoch": 0.93, + "grad_norm": 2.523946852915129, + "learning_rate": 1.169739005042525e-07, + "loss": 0.5311, + "step": 13149 + }, + { + "epoch": 0.93, + "grad_norm": 3.5829279558453524, + "learning_rate": 1.1672691249636269e-07, + "loss": 0.5673, + "step": 13150 + }, + { + "epoch": 0.93, + "grad_norm": 1.9563332832021643, + "learning_rate": 1.1648018243950454e-07, + "loss": 0.5122, + "step": 13151 + }, + { + "epoch": 0.93, + "grad_norm": 1.8385638756270517, + "learning_rate": 1.1623371034671039e-07, + "loss": 0.5454, + "step": 13152 + }, + { + "epoch": 0.93, + "grad_norm": 1.7432544276344193, + "learning_rate": 1.1598749623099926e-07, + "loss": 0.5029, + "step": 13153 + }, + { + "epoch": 0.93, + "grad_norm": 1.7981484131174983, + "learning_rate": 1.1574154010537908e-07, + "loss": 0.5406, + "step": 13154 + }, + { + "epoch": 0.93, + "grad_norm": 1.6362569938572553, + "learning_rate": 1.1549584198284058e-07, + "loss": 0.501, + "step": 13155 + }, + { + "epoch": 0.93, + "grad_norm": 2.1954736501952956, + "learning_rate": 1.1525040187636283e-07, + "loss": 0.5487, + "step": 13156 + }, + { + "epoch": 0.93, + "grad_norm": 1.920972302088347, + "learning_rate": 1.1500521979890989e-07, + "loss": 0.5725, + "step": 13157 + }, + { + "epoch": 0.93, + "grad_norm": 2.041531314431294, + "learning_rate": 1.1476029576343473e-07, + "loss": 0.4694, + "step": 13158 + }, + { + "epoch": 0.93, + "grad_norm": 1.9773427262837928, + "learning_rate": 1.1451562978287368e-07, + "loss": 0.4899, + "step": 13159 + }, + { + "epoch": 0.93, + "grad_norm": 2.0246123715490754, + "learning_rate": 1.1427122187015194e-07, + "loss": 0.533, + "step": 13160 + }, + { + "epoch": 0.93, + "grad_norm": 1.8532688588907387, + "learning_rate": 1.1402707203817865e-07, + "loss": 0.5102, + "step": 13161 + }, + { + "epoch": 0.93, + "grad_norm": 1.6367604121184889, + "learning_rate": 1.1378318029985125e-07, + "loss": 0.564, + "step": 13162 + }, + { + "epoch": 0.93, + "grad_norm": 5.446211565019482, + "learning_rate": 1.135395466680539e-07, + "loss": 0.4454, + "step": 13163 + }, + { + "epoch": 0.93, + "grad_norm": 2.217608694537276, + "learning_rate": 1.1329617115565461e-07, + "loss": 0.5507, + "step": 13164 + }, + { + "epoch": 0.93, + "grad_norm": 1.8305696431866008, + "learning_rate": 1.1305305377551035e-07, + "loss": 0.502, + "step": 13165 + }, + { + "epoch": 0.93, + "grad_norm": 1.7683184842654862, + "learning_rate": 1.1281019454046249e-07, + "loss": 0.5331, + "step": 13166 + }, + { + "epoch": 0.93, + "grad_norm": 1.8013851487819206, + "learning_rate": 1.1256759346334078e-07, + "loss": 0.5089, + "step": 13167 + }, + { + "epoch": 0.93, + "grad_norm": 1.7448758768439352, + "learning_rate": 1.1232525055695942e-07, + "loss": 0.5539, + "step": 13168 + }, + { + "epoch": 0.93, + "grad_norm": 2.7688480595484135, + "learning_rate": 1.1208316583411983e-07, + "loss": 0.4582, + "step": 13169 + }, + { + "epoch": 0.93, + "grad_norm": 2.0185065368087485, + "learning_rate": 1.1184133930761066e-07, + "loss": 0.5218, + "step": 13170 + }, + { + "epoch": 0.93, + "grad_norm": 3.8818501127252585, + "learning_rate": 1.1159977099020447e-07, + "loss": 0.445, + "step": 13171 + }, + { + "epoch": 0.93, + "grad_norm": 1.851163219283377, + "learning_rate": 1.1135846089466273e-07, + "loss": 0.6096, + "step": 13172 + }, + { + "epoch": 0.93, + "grad_norm": 0.7119038612668248, + "learning_rate": 1.1111740903373247e-07, + "loss": 0.4026, + "step": 13173 + }, + { + "epoch": 0.93, + "grad_norm": 2.026616984199528, + "learning_rate": 1.1087661542014572e-07, + "loss": 0.5242, + "step": 13174 + }, + { + "epoch": 0.93, + "grad_norm": 1.6312344066019504, + "learning_rate": 1.1063608006662396e-07, + "loss": 0.506, + "step": 13175 + }, + { + "epoch": 0.93, + "grad_norm": 1.6071782358727515, + "learning_rate": 1.1039580298587038e-07, + "loss": 0.5356, + "step": 13176 + }, + { + "epoch": 0.94, + "grad_norm": 2.3896970562175013, + "learning_rate": 1.1015578419057981e-07, + "loss": 0.5647, + "step": 13177 + }, + { + "epoch": 0.94, + "grad_norm": 1.7586881291761984, + "learning_rate": 1.0991602369342935e-07, + "loss": 0.489, + "step": 13178 + }, + { + "epoch": 0.94, + "grad_norm": 1.552357694599565, + "learning_rate": 1.096765215070844e-07, + "loss": 0.5342, + "step": 13179 + }, + { + "epoch": 0.94, + "grad_norm": 1.4736974076713947, + "learning_rate": 1.0943727764419654e-07, + "loss": 0.4739, + "step": 13180 + }, + { + "epoch": 0.94, + "grad_norm": 1.5245124469287474, + "learning_rate": 1.0919829211740285e-07, + "loss": 0.4836, + "step": 13181 + }, + { + "epoch": 0.94, + "grad_norm": 1.650646123254438, + "learning_rate": 1.0895956493932825e-07, + "loss": 0.5209, + "step": 13182 + }, + { + "epoch": 0.94, + "grad_norm": 1.825913493686763, + "learning_rate": 1.0872109612258152e-07, + "loss": 0.5655, + "step": 13183 + }, + { + "epoch": 0.94, + "grad_norm": 1.6885782997955416, + "learning_rate": 1.0848288567976095e-07, + "loss": 0.5073, + "step": 13184 + }, + { + "epoch": 0.94, + "grad_norm": 1.6179252823491033, + "learning_rate": 1.0824493362344924e-07, + "loss": 0.4825, + "step": 13185 + }, + { + "epoch": 0.94, + "grad_norm": 1.7054783136312042, + "learning_rate": 1.0800723996621577e-07, + "loss": 0.56, + "step": 13186 + }, + { + "epoch": 0.94, + "grad_norm": 1.6334818530026622, + "learning_rate": 1.0776980472061549e-07, + "loss": 0.536, + "step": 13187 + }, + { + "epoch": 0.94, + "grad_norm": 1.6222781495632796, + "learning_rate": 1.0753262789919228e-07, + "loss": 0.5189, + "step": 13188 + }, + { + "epoch": 0.94, + "grad_norm": 1.681180530064635, + "learning_rate": 1.0729570951447276e-07, + "loss": 0.4409, + "step": 13189 + }, + { + "epoch": 0.94, + "grad_norm": 1.672149976906714, + "learning_rate": 1.0705904957897306e-07, + "loss": 0.4817, + "step": 13190 + }, + { + "epoch": 0.94, + "grad_norm": 1.8774081396217348, + "learning_rate": 1.0682264810519372e-07, + "loss": 0.4976, + "step": 13191 + }, + { + "epoch": 0.94, + "grad_norm": 2.4376795941643055, + "learning_rate": 1.0658650510562251e-07, + "loss": 0.5875, + "step": 13192 + }, + { + "epoch": 0.94, + "grad_norm": 2.045113512317086, + "learning_rate": 1.0635062059273338e-07, + "loss": 0.5676, + "step": 13193 + }, + { + "epoch": 0.94, + "grad_norm": 1.6933141906350917, + "learning_rate": 1.0611499457898577e-07, + "loss": 0.471, + "step": 13194 + }, + { + "epoch": 0.94, + "grad_norm": 1.6882715042453085, + "learning_rate": 1.0587962707682753e-07, + "loss": 0.4686, + "step": 13195 + }, + { + "epoch": 0.94, + "grad_norm": 3.0869017847599993, + "learning_rate": 1.0564451809869092e-07, + "loss": 0.5442, + "step": 13196 + }, + { + "epoch": 0.94, + "grad_norm": 1.685210001726867, + "learning_rate": 1.054096676569949e-07, + "loss": 0.5483, + "step": 13197 + }, + { + "epoch": 0.94, + "grad_norm": 0.7211416235464321, + "learning_rate": 1.0517507576414565e-07, + "loss": 0.3983, + "step": 13198 + }, + { + "epoch": 0.94, + "grad_norm": 1.5315092226876486, + "learning_rate": 1.0494074243253439e-07, + "loss": 0.5165, + "step": 13199 + }, + { + "epoch": 0.94, + "grad_norm": 1.5225681055774467, + "learning_rate": 1.0470666767454063e-07, + "loss": 0.4643, + "step": 13200 + }, + { + "epoch": 0.94, + "grad_norm": 1.825181770706918, + "learning_rate": 1.0447285150252784e-07, + "loss": 0.595, + "step": 13201 + }, + { + "epoch": 0.94, + "grad_norm": 1.6141646241871468, + "learning_rate": 1.0423929392884724e-07, + "loss": 0.5808, + "step": 13202 + }, + { + "epoch": 0.94, + "grad_norm": 1.7522258146657708, + "learning_rate": 1.0400599496583619e-07, + "loss": 0.4871, + "step": 13203 + }, + { + "epoch": 0.94, + "grad_norm": 2.5998027009521794, + "learning_rate": 1.0377295462581816e-07, + "loss": 0.5454, + "step": 13204 + }, + { + "epoch": 0.94, + "grad_norm": 1.674251663777696, + "learning_rate": 1.0354017292110385e-07, + "loss": 0.5317, + "step": 13205 + }, + { + "epoch": 0.94, + "grad_norm": 1.695856066472541, + "learning_rate": 1.0330764986398844e-07, + "loss": 0.5764, + "step": 13206 + }, + { + "epoch": 0.94, + "grad_norm": 1.7799558121192665, + "learning_rate": 1.0307538546675599e-07, + "loss": 0.5404, + "step": 13207 + }, + { + "epoch": 0.94, + "grad_norm": 2.7856162461635967, + "learning_rate": 1.0284337974167446e-07, + "loss": 0.4375, + "step": 13208 + }, + { + "epoch": 0.94, + "grad_norm": 1.873481232213926, + "learning_rate": 1.0261163270099905e-07, + "loss": 0.4645, + "step": 13209 + }, + { + "epoch": 0.94, + "grad_norm": 0.6883294051777229, + "learning_rate": 1.0238014435697219e-07, + "loss": 0.432, + "step": 13210 + }, + { + "epoch": 0.94, + "grad_norm": 0.6905730275908555, + "learning_rate": 1.0214891472182131e-07, + "loss": 0.3976, + "step": 13211 + }, + { + "epoch": 0.94, + "grad_norm": 1.689132259113071, + "learning_rate": 1.0191794380776166e-07, + "loss": 0.5878, + "step": 13212 + }, + { + "epoch": 0.94, + "grad_norm": 0.7159264484856204, + "learning_rate": 1.016872316269918e-07, + "loss": 0.4227, + "step": 13213 + }, + { + "epoch": 0.94, + "grad_norm": 1.4854025721230046, + "learning_rate": 1.014567781917014e-07, + "loss": 0.558, + "step": 13214 + }, + { + "epoch": 0.94, + "grad_norm": 1.5644879375662322, + "learning_rate": 1.012265835140619e-07, + "loss": 0.4481, + "step": 13215 + }, + { + "epoch": 0.94, + "grad_norm": 1.8360012626429543, + "learning_rate": 1.0099664760623407e-07, + "loss": 0.5144, + "step": 13216 + }, + { + "epoch": 0.94, + "grad_norm": 2.1959262740740444, + "learning_rate": 1.0076697048036266e-07, + "loss": 0.5215, + "step": 13217 + }, + { + "epoch": 0.94, + "grad_norm": 1.9547117499634035, + "learning_rate": 1.0053755214858129e-07, + "loss": 0.5408, + "step": 13218 + }, + { + "epoch": 0.94, + "grad_norm": 2.156349269935813, + "learning_rate": 1.0030839262300807e-07, + "loss": 0.5218, + "step": 13219 + }, + { + "epoch": 0.94, + "grad_norm": 1.882461647230272, + "learning_rate": 1.0007949191574717e-07, + "loss": 0.5249, + "step": 13220 + }, + { + "epoch": 0.94, + "grad_norm": 1.698588282893965, + "learning_rate": 9.985085003889173e-08, + "loss": 0.5448, + "step": 13221 + }, + { + "epoch": 0.94, + "grad_norm": 0.6635177838975698, + "learning_rate": 9.962246700451761e-08, + "loss": 0.392, + "step": 13222 + }, + { + "epoch": 0.94, + "grad_norm": 1.7308755506393345, + "learning_rate": 9.939434282469018e-08, + "loss": 0.5048, + "step": 13223 + }, + { + "epoch": 0.94, + "grad_norm": 1.6001245004397182, + "learning_rate": 9.916647751145869e-08, + "loss": 0.528, + "step": 13224 + }, + { + "epoch": 0.94, + "grad_norm": 2.6753705312184866, + "learning_rate": 9.893887107686017e-08, + "loss": 0.539, + "step": 13225 + }, + { + "epoch": 0.94, + "grad_norm": 1.6515335350470768, + "learning_rate": 9.871152353291724e-08, + "loss": 0.5072, + "step": 13226 + }, + { + "epoch": 0.94, + "grad_norm": 2.2142488468767705, + "learning_rate": 9.848443489163972e-08, + "loss": 0.4658, + "step": 13227 + }, + { + "epoch": 0.94, + "grad_norm": 1.5323670454336231, + "learning_rate": 9.825760516502302e-08, + "loss": 0.4948, + "step": 13228 + }, + { + "epoch": 0.94, + "grad_norm": 1.6623706467915622, + "learning_rate": 9.803103436504923e-08, + "loss": 0.5911, + "step": 13229 + }, + { + "epoch": 0.94, + "grad_norm": 1.6423071178462147, + "learning_rate": 9.780472250368656e-08, + "loss": 0.5101, + "step": 13230 + }, + { + "epoch": 0.94, + "grad_norm": 1.8516455389639768, + "learning_rate": 9.757866959288875e-08, + "loss": 0.5603, + "step": 13231 + }, + { + "epoch": 0.94, + "grad_norm": 1.8004678887841719, + "learning_rate": 9.735287564459739e-08, + "loss": 0.5066, + "step": 13232 + }, + { + "epoch": 0.94, + "grad_norm": 1.7875690978134984, + "learning_rate": 9.712734067073959e-08, + "loss": 0.5498, + "step": 13233 + }, + { + "epoch": 0.94, + "grad_norm": 1.522951121094993, + "learning_rate": 9.69020646832286e-08, + "loss": 0.4695, + "step": 13234 + }, + { + "epoch": 0.94, + "grad_norm": 1.5479729106162157, + "learning_rate": 9.667704769396546e-08, + "loss": 0.3966, + "step": 13235 + }, + { + "epoch": 0.94, + "grad_norm": 2.1753118609488222, + "learning_rate": 9.6452289714834e-08, + "loss": 0.4988, + "step": 13236 + }, + { + "epoch": 0.94, + "grad_norm": 1.7439013878297207, + "learning_rate": 9.622779075770917e-08, + "loss": 0.5231, + "step": 13237 + }, + { + "epoch": 0.94, + "grad_norm": 1.9007875054003631, + "learning_rate": 9.600355083444813e-08, + "loss": 0.503, + "step": 13238 + }, + { + "epoch": 0.94, + "grad_norm": 2.231269560907182, + "learning_rate": 9.577956995689697e-08, + "loss": 0.52, + "step": 13239 + }, + { + "epoch": 0.94, + "grad_norm": 1.9305540414562146, + "learning_rate": 9.555584813688623e-08, + "loss": 0.4918, + "step": 13240 + }, + { + "epoch": 0.94, + "grad_norm": 0.6860089648071429, + "learning_rate": 9.533238538623424e-08, + "loss": 0.4148, + "step": 13241 + }, + { + "epoch": 0.94, + "grad_norm": 2.0493181056811838, + "learning_rate": 9.510918171674544e-08, + "loss": 0.5698, + "step": 13242 + }, + { + "epoch": 0.94, + "grad_norm": 1.8118398591915739, + "learning_rate": 9.488623714020872e-08, + "loss": 0.5558, + "step": 13243 + }, + { + "epoch": 0.94, + "grad_norm": 2.0265985300652667, + "learning_rate": 9.466355166840302e-08, + "loss": 0.4574, + "step": 13244 + }, + { + "epoch": 0.94, + "grad_norm": 1.9180999210340808, + "learning_rate": 9.444112531308892e-08, + "loss": 0.4909, + "step": 13245 + }, + { + "epoch": 0.94, + "grad_norm": 1.9008720103183583, + "learning_rate": 9.421895808601811e-08, + "loss": 0.5194, + "step": 13246 + }, + { + "epoch": 0.94, + "grad_norm": 2.126353036520229, + "learning_rate": 9.399704999892345e-08, + "loss": 0.572, + "step": 13247 + }, + { + "epoch": 0.94, + "grad_norm": 2.3202856101685114, + "learning_rate": 9.377540106352945e-08, + "loss": 0.5352, + "step": 13248 + }, + { + "epoch": 0.94, + "grad_norm": 1.6303168472489464, + "learning_rate": 9.35540112915434e-08, + "loss": 0.4783, + "step": 13249 + }, + { + "epoch": 0.94, + "grad_norm": 1.8721498355682304, + "learning_rate": 9.333288069465928e-08, + "loss": 0.5247, + "step": 13250 + }, + { + "epoch": 0.94, + "grad_norm": 1.591092591852355, + "learning_rate": 9.311200928455832e-08, + "loss": 0.5354, + "step": 13251 + }, + { + "epoch": 0.94, + "grad_norm": 2.155419284456468, + "learning_rate": 9.289139707290839e-08, + "loss": 0.5709, + "step": 13252 + }, + { + "epoch": 0.94, + "grad_norm": 1.7047973816410467, + "learning_rate": 9.26710440713624e-08, + "loss": 0.4983, + "step": 13253 + }, + { + "epoch": 0.94, + "grad_norm": 1.5643003050366895, + "learning_rate": 9.245095029155993e-08, + "loss": 0.4884, + "step": 13254 + }, + { + "epoch": 0.94, + "grad_norm": 1.8580252611109072, + "learning_rate": 9.223111574512721e-08, + "loss": 0.5542, + "step": 13255 + }, + { + "epoch": 0.94, + "grad_norm": 1.7797127195306517, + "learning_rate": 9.201154044367667e-08, + "loss": 0.5802, + "step": 13256 + }, + { + "epoch": 0.94, + "grad_norm": 0.7324805747955572, + "learning_rate": 9.17922243988073e-08, + "loss": 0.4248, + "step": 13257 + }, + { + "epoch": 0.94, + "grad_norm": 1.6851642828882265, + "learning_rate": 9.157316762210378e-08, + "loss": 0.5124, + "step": 13258 + }, + { + "epoch": 0.94, + "grad_norm": 1.6566246197704426, + "learning_rate": 9.135437012513737e-08, + "loss": 0.4761, + "step": 13259 + }, + { + "epoch": 0.94, + "grad_norm": 1.514201655924791, + "learning_rate": 9.113583191946662e-08, + "loss": 0.5236, + "step": 13260 + }, + { + "epoch": 0.94, + "grad_norm": 1.634956392473887, + "learning_rate": 9.091755301663452e-08, + "loss": 0.4814, + "step": 13261 + }, + { + "epoch": 0.94, + "grad_norm": 1.8323985454617717, + "learning_rate": 9.069953342817129e-08, + "loss": 0.5451, + "step": 13262 + }, + { + "epoch": 0.94, + "grad_norm": 1.9242919702295125, + "learning_rate": 9.048177316559326e-08, + "loss": 0.5202, + "step": 13263 + }, + { + "epoch": 0.94, + "grad_norm": 1.8898142409798948, + "learning_rate": 9.026427224040402e-08, + "loss": 0.5051, + "step": 13264 + }, + { + "epoch": 0.94, + "grad_norm": 1.9425505713913152, + "learning_rate": 9.004703066409326e-08, + "loss": 0.5424, + "step": 13265 + }, + { + "epoch": 0.94, + "grad_norm": 1.8531722529918413, + "learning_rate": 8.983004844813404e-08, + "loss": 0.6175, + "step": 13266 + }, + { + "epoch": 0.94, + "grad_norm": 1.5267709103770997, + "learning_rate": 8.96133256039905e-08, + "loss": 0.516, + "step": 13267 + }, + { + "epoch": 0.94, + "grad_norm": 1.5733922955172732, + "learning_rate": 8.939686214310961e-08, + "loss": 0.54, + "step": 13268 + }, + { + "epoch": 0.94, + "grad_norm": 1.5930758632501985, + "learning_rate": 8.91806580769261e-08, + "loss": 0.5711, + "step": 13269 + }, + { + "epoch": 0.94, + "grad_norm": 1.660508156651888, + "learning_rate": 8.896471341685975e-08, + "loss": 0.5352, + "step": 13270 + }, + { + "epoch": 0.94, + "grad_norm": 2.149956409546665, + "learning_rate": 8.874902817431863e-08, + "loss": 0.5016, + "step": 13271 + }, + { + "epoch": 0.94, + "grad_norm": 1.7553119807356274, + "learning_rate": 8.85336023606953e-08, + "loss": 0.5235, + "step": 13272 + }, + { + "epoch": 0.94, + "grad_norm": 0.7267233401169656, + "learning_rate": 8.8318435987369e-08, + "loss": 0.443, + "step": 13273 + }, + { + "epoch": 0.94, + "grad_norm": 0.61640499114548, + "learning_rate": 8.810352906570673e-08, + "loss": 0.4089, + "step": 13274 + }, + { + "epoch": 0.94, + "grad_norm": 1.7988610144250905, + "learning_rate": 8.788888160705888e-08, + "loss": 0.4907, + "step": 13275 + }, + { + "epoch": 0.94, + "grad_norm": 1.589982635070214, + "learning_rate": 8.767449362276526e-08, + "loss": 0.5411, + "step": 13276 + }, + { + "epoch": 0.94, + "grad_norm": 1.7601502470007693, + "learning_rate": 8.746036512415013e-08, + "loss": 0.5573, + "step": 13277 + }, + { + "epoch": 0.94, + "grad_norm": 2.3563650243590137, + "learning_rate": 8.724649612252445e-08, + "loss": 0.5222, + "step": 13278 + }, + { + "epoch": 0.94, + "grad_norm": 1.8995182289689183, + "learning_rate": 8.70328866291864e-08, + "loss": 0.563, + "step": 13279 + }, + { + "epoch": 0.94, + "grad_norm": 1.8196757895037838, + "learning_rate": 8.681953665541754e-08, + "loss": 0.5509, + "step": 13280 + }, + { + "epoch": 0.94, + "grad_norm": 1.8578971754819096, + "learning_rate": 8.660644621248938e-08, + "loss": 0.5021, + "step": 13281 + }, + { + "epoch": 0.94, + "grad_norm": 2.4547226703414844, + "learning_rate": 8.639361531165735e-08, + "loss": 0.4671, + "step": 13282 + }, + { + "epoch": 0.94, + "grad_norm": 3.8213172429616193, + "learning_rate": 8.618104396416416e-08, + "loss": 0.4731, + "step": 13283 + }, + { + "epoch": 0.94, + "grad_norm": 1.7966337444902523, + "learning_rate": 8.596873218123858e-08, + "loss": 0.5094, + "step": 13284 + }, + { + "epoch": 0.94, + "grad_norm": 1.628631105505193, + "learning_rate": 8.575667997409553e-08, + "loss": 0.5169, + "step": 13285 + }, + { + "epoch": 0.94, + "grad_norm": 1.8026571227831583, + "learning_rate": 8.554488735393662e-08, + "loss": 0.5227, + "step": 13286 + }, + { + "epoch": 0.94, + "grad_norm": 1.7220391125107832, + "learning_rate": 8.5333354331949e-08, + "loss": 0.4843, + "step": 13287 + }, + { + "epoch": 0.94, + "grad_norm": 0.7306319138885214, + "learning_rate": 8.512208091930706e-08, + "loss": 0.3905, + "step": 13288 + }, + { + "epoch": 0.94, + "grad_norm": 1.5796255258153578, + "learning_rate": 8.491106712716968e-08, + "loss": 0.5234, + "step": 13289 + }, + { + "epoch": 0.94, + "grad_norm": 1.511806742799927, + "learning_rate": 8.470031296668569e-08, + "loss": 0.4512, + "step": 13290 + }, + { + "epoch": 0.94, + "grad_norm": 2.1707546612581257, + "learning_rate": 8.448981844898563e-08, + "loss": 0.4832, + "step": 13291 + }, + { + "epoch": 0.94, + "grad_norm": 2.17274749425256, + "learning_rate": 8.42795835851895e-08, + "loss": 0.5702, + "step": 13292 + }, + { + "epoch": 0.94, + "grad_norm": 1.644371463017494, + "learning_rate": 8.406960838640233e-08, + "loss": 0.5507, + "step": 13293 + }, + { + "epoch": 0.94, + "grad_norm": 1.6578637416713278, + "learning_rate": 8.385989286371632e-08, + "loss": 0.5613, + "step": 13294 + }, + { + "epoch": 0.94, + "grad_norm": 1.8034548225494922, + "learning_rate": 8.365043702820875e-08, + "loss": 0.6174, + "step": 13295 + }, + { + "epoch": 0.94, + "grad_norm": 1.5929181072933678, + "learning_rate": 8.344124089094352e-08, + "loss": 0.5135, + "step": 13296 + }, + { + "epoch": 0.94, + "grad_norm": 1.5657749806087617, + "learning_rate": 8.323230446297237e-08, + "loss": 0.5416, + "step": 13297 + }, + { + "epoch": 0.94, + "grad_norm": 3.0966734878638795, + "learning_rate": 8.302362775533091e-08, + "loss": 0.5028, + "step": 13298 + }, + { + "epoch": 0.94, + "grad_norm": 1.527821019626074, + "learning_rate": 8.281521077904198e-08, + "loss": 0.4321, + "step": 13299 + }, + { + "epoch": 0.94, + "grad_norm": 1.721230922689349, + "learning_rate": 8.260705354511566e-08, + "loss": 0.5929, + "step": 13300 + }, + { + "epoch": 0.94, + "grad_norm": 1.7585005840800618, + "learning_rate": 8.239915606454707e-08, + "loss": 0.497, + "step": 13301 + }, + { + "epoch": 0.94, + "grad_norm": 1.629287753706686, + "learning_rate": 8.219151834831851e-08, + "loss": 0.5084, + "step": 13302 + }, + { + "epoch": 0.94, + "grad_norm": 1.5541421447740387, + "learning_rate": 8.198414040739732e-08, + "loss": 0.5265, + "step": 13303 + }, + { + "epoch": 0.94, + "grad_norm": 1.6267837962100555, + "learning_rate": 8.177702225273865e-08, + "loss": 0.5118, + "step": 13304 + }, + { + "epoch": 0.94, + "grad_norm": 2.268169319512834, + "learning_rate": 8.157016389528261e-08, + "loss": 0.6038, + "step": 13305 + }, + { + "epoch": 0.94, + "grad_norm": 2.1061410647800214, + "learning_rate": 8.136356534595602e-08, + "loss": 0.5375, + "step": 13306 + }, + { + "epoch": 0.94, + "grad_norm": 1.7152081647130295, + "learning_rate": 8.115722661567348e-08, + "loss": 0.5579, + "step": 13307 + }, + { + "epoch": 0.94, + "grad_norm": 1.8035117071798605, + "learning_rate": 8.095114771533297e-08, + "loss": 0.4863, + "step": 13308 + }, + { + "epoch": 0.94, + "grad_norm": 1.663476190277438, + "learning_rate": 8.074532865582129e-08, + "loss": 0.5144, + "step": 13309 + }, + { + "epoch": 0.94, + "grad_norm": 1.6734809985375327, + "learning_rate": 8.053976944800978e-08, + "loss": 0.5084, + "step": 13310 + }, + { + "epoch": 0.94, + "grad_norm": 1.5999668721413445, + "learning_rate": 8.033447010275696e-08, + "loss": 0.4809, + "step": 13311 + }, + { + "epoch": 0.94, + "grad_norm": 1.5818141862270003, + "learning_rate": 8.01294306309075e-08, + "loss": 0.5079, + "step": 13312 + }, + { + "epoch": 0.94, + "grad_norm": 4.794585032305057, + "learning_rate": 7.992465104329273e-08, + "loss": 0.4826, + "step": 13313 + }, + { + "epoch": 0.94, + "grad_norm": 0.7124379836122415, + "learning_rate": 7.972013135072897e-08, + "loss": 0.4385, + "step": 13314 + }, + { + "epoch": 0.94, + "grad_norm": 1.8364607206223444, + "learning_rate": 7.95158715640193e-08, + "loss": 0.4907, + "step": 13315 + }, + { + "epoch": 0.94, + "grad_norm": 2.1914095169768424, + "learning_rate": 7.93118716939556e-08, + "loss": 0.5612, + "step": 13316 + }, + { + "epoch": 0.95, + "grad_norm": 1.8631439012568103, + "learning_rate": 7.910813175131205e-08, + "loss": 0.517, + "step": 13317 + }, + { + "epoch": 0.95, + "grad_norm": 2.0793786942444163, + "learning_rate": 7.890465174685114e-08, + "loss": 0.5409, + "step": 13318 + }, + { + "epoch": 0.95, + "grad_norm": 1.81298250185738, + "learning_rate": 7.870143169132093e-08, + "loss": 0.5103, + "step": 13319 + }, + { + "epoch": 0.95, + "grad_norm": 1.4850967765852918, + "learning_rate": 7.849847159545731e-08, + "loss": 0.4893, + "step": 13320 + }, + { + "epoch": 0.95, + "grad_norm": 1.5450078992982785, + "learning_rate": 7.829577146998058e-08, + "loss": 0.5476, + "step": 13321 + }, + { + "epoch": 0.95, + "grad_norm": 1.9048665646178986, + "learning_rate": 7.809333132559827e-08, + "loss": 0.5156, + "step": 13322 + }, + { + "epoch": 0.95, + "grad_norm": 1.769447589377848, + "learning_rate": 7.78911511730035e-08, + "loss": 0.4928, + "step": 13323 + }, + { + "epoch": 0.95, + "grad_norm": 1.9334912059502467, + "learning_rate": 7.768923102287717e-08, + "loss": 0.4762, + "step": 13324 + }, + { + "epoch": 0.95, + "grad_norm": 0.7260619027981845, + "learning_rate": 7.748757088588466e-08, + "loss": 0.4014, + "step": 13325 + }, + { + "epoch": 0.95, + "grad_norm": 1.673369725576811, + "learning_rate": 7.728617077267742e-08, + "loss": 0.5468, + "step": 13326 + }, + { + "epoch": 0.95, + "grad_norm": 1.781258213419989, + "learning_rate": 7.708503069389639e-08, + "loss": 0.5403, + "step": 13327 + }, + { + "epoch": 0.95, + "grad_norm": 1.748714564549957, + "learning_rate": 7.68841506601642e-08, + "loss": 0.4827, + "step": 13328 + }, + { + "epoch": 0.95, + "grad_norm": 2.059987221184396, + "learning_rate": 7.668353068209344e-08, + "loss": 0.4804, + "step": 13329 + }, + { + "epoch": 0.95, + "grad_norm": 1.7375412250997837, + "learning_rate": 7.648317077028066e-08, + "loss": 0.5425, + "step": 13330 + }, + { + "epoch": 0.95, + "grad_norm": 1.6834722007441973, + "learning_rate": 7.628307093531018e-08, + "loss": 0.5485, + "step": 13331 + }, + { + "epoch": 0.95, + "grad_norm": 1.700299148201846, + "learning_rate": 7.608323118775241e-08, + "loss": 0.4972, + "step": 13332 + }, + { + "epoch": 0.95, + "grad_norm": 2.278503819402102, + "learning_rate": 7.588365153816224e-08, + "loss": 0.5319, + "step": 13333 + }, + { + "epoch": 0.95, + "grad_norm": 1.677861048642186, + "learning_rate": 7.568433199708236e-08, + "loss": 0.5585, + "step": 13334 + }, + { + "epoch": 0.95, + "grad_norm": 1.6517932075943318, + "learning_rate": 7.548527257504267e-08, + "loss": 0.5152, + "step": 13335 + }, + { + "epoch": 0.95, + "grad_norm": 1.7805255814493153, + "learning_rate": 7.528647328255701e-08, + "loss": 0.5705, + "step": 13336 + }, + { + "epoch": 0.95, + "grad_norm": 1.7030129469383202, + "learning_rate": 7.508793413012749e-08, + "loss": 0.5036, + "step": 13337 + }, + { + "epoch": 0.95, + "grad_norm": 1.7415983330134428, + "learning_rate": 7.48896551282402e-08, + "loss": 0.5223, + "step": 13338 + }, + { + "epoch": 0.95, + "grad_norm": 1.7230438882791808, + "learning_rate": 7.469163628737064e-08, + "loss": 0.5659, + "step": 13339 + }, + { + "epoch": 0.95, + "grad_norm": 1.5236686472499619, + "learning_rate": 7.449387761797766e-08, + "loss": 0.4994, + "step": 13340 + }, + { + "epoch": 0.95, + "grad_norm": 1.6632839415721188, + "learning_rate": 7.429637913050847e-08, + "loss": 0.5259, + "step": 13341 + }, + { + "epoch": 0.95, + "grad_norm": 1.9709579829148869, + "learning_rate": 7.409914083539471e-08, + "loss": 0.578, + "step": 13342 + }, + { + "epoch": 0.95, + "grad_norm": 1.6354446564805123, + "learning_rate": 7.390216274305584e-08, + "loss": 0.5655, + "step": 13343 + }, + { + "epoch": 0.95, + "grad_norm": 1.879110780828671, + "learning_rate": 7.370544486389686e-08, + "loss": 0.5933, + "step": 13344 + }, + { + "epoch": 0.95, + "grad_norm": 1.8048491971300358, + "learning_rate": 7.350898720830779e-08, + "loss": 0.5085, + "step": 13345 + }, + { + "epoch": 0.95, + "grad_norm": 1.492622374601976, + "learning_rate": 7.331278978666812e-08, + "loss": 0.4563, + "step": 13346 + }, + { + "epoch": 0.95, + "grad_norm": 1.6756374909703124, + "learning_rate": 7.311685260934065e-08, + "loss": 0.4758, + "step": 13347 + }, + { + "epoch": 0.95, + "grad_norm": 1.6452728306211448, + "learning_rate": 7.2921175686676e-08, + "loss": 0.535, + "step": 13348 + }, + { + "epoch": 0.95, + "grad_norm": 0.7059315716079407, + "learning_rate": 7.272575902900925e-08, + "loss": 0.4298, + "step": 13349 + }, + { + "epoch": 0.95, + "grad_norm": 1.6180626788816594, + "learning_rate": 7.253060264666434e-08, + "loss": 0.4945, + "step": 13350 + }, + { + "epoch": 0.95, + "grad_norm": 0.6678214505068178, + "learning_rate": 7.233570654994915e-08, + "loss": 0.4226, + "step": 13351 + }, + { + "epoch": 0.95, + "grad_norm": 1.7883200628945006, + "learning_rate": 7.214107074915932e-08, + "loss": 0.5283, + "step": 13352 + }, + { + "epoch": 0.95, + "grad_norm": 1.6163726106285747, + "learning_rate": 7.194669525457609e-08, + "loss": 0.4689, + "step": 13353 + }, + { + "epoch": 0.95, + "grad_norm": 1.6622326364157907, + "learning_rate": 7.175258007646679e-08, + "loss": 0.4785, + "step": 13354 + }, + { + "epoch": 0.95, + "grad_norm": 0.6653429803520305, + "learning_rate": 7.155872522508545e-08, + "loss": 0.4165, + "step": 13355 + }, + { + "epoch": 0.95, + "grad_norm": 1.914487779258154, + "learning_rate": 7.136513071067164e-08, + "loss": 0.5314, + "step": 13356 + }, + { + "epoch": 0.95, + "grad_norm": 1.5303492511056946, + "learning_rate": 7.117179654345329e-08, + "loss": 0.4964, + "step": 13357 + }, + { + "epoch": 0.95, + "grad_norm": 3.120055217182968, + "learning_rate": 7.09787227336406e-08, + "loss": 0.5872, + "step": 13358 + }, + { + "epoch": 0.95, + "grad_norm": 1.8761769429594013, + "learning_rate": 7.078590929143426e-08, + "loss": 0.5529, + "step": 13359 + }, + { + "epoch": 0.95, + "grad_norm": 1.9294302636094869, + "learning_rate": 7.059335622701835e-08, + "loss": 0.4854, + "step": 13360 + }, + { + "epoch": 0.95, + "grad_norm": 1.6161278765833476, + "learning_rate": 7.040106355056476e-08, + "loss": 0.4661, + "step": 13361 + }, + { + "epoch": 0.95, + "grad_norm": 1.67341798805015, + "learning_rate": 7.02090312722309e-08, + "loss": 0.5799, + "step": 13362 + }, + { + "epoch": 0.95, + "grad_norm": 1.875048625533406, + "learning_rate": 7.001725940215975e-08, + "loss": 0.6032, + "step": 13363 + }, + { + "epoch": 0.95, + "grad_norm": 2.0831020079408535, + "learning_rate": 6.982574795048214e-08, + "loss": 0.5019, + "step": 13364 + }, + { + "epoch": 0.95, + "grad_norm": 1.553063035689426, + "learning_rate": 6.963449692731439e-08, + "loss": 0.537, + "step": 13365 + }, + { + "epoch": 0.95, + "grad_norm": 1.4975976741123553, + "learning_rate": 6.944350634275898e-08, + "loss": 0.5005, + "step": 13366 + }, + { + "epoch": 0.95, + "grad_norm": 2.3666998139686903, + "learning_rate": 6.925277620690507e-08, + "loss": 0.49, + "step": 13367 + }, + { + "epoch": 0.95, + "grad_norm": 2.5694537282616436, + "learning_rate": 6.906230652982627e-08, + "loss": 0.5114, + "step": 13368 + }, + { + "epoch": 0.95, + "grad_norm": 1.6180963383890599, + "learning_rate": 6.887209732158506e-08, + "loss": 0.4953, + "step": 13369 + }, + { + "epoch": 0.95, + "grad_norm": 2.144715485930929, + "learning_rate": 6.868214859222899e-08, + "loss": 0.5491, + "step": 13370 + }, + { + "epoch": 0.95, + "grad_norm": 1.5294086217624001, + "learning_rate": 6.849246035179057e-08, + "loss": 0.476, + "step": 13371 + }, + { + "epoch": 0.95, + "grad_norm": 1.5904972834732505, + "learning_rate": 6.830303261029126e-08, + "loss": 0.4757, + "step": 13372 + }, + { + "epoch": 0.95, + "grad_norm": 1.8288579947357921, + "learning_rate": 6.811386537773634e-08, + "loss": 0.575, + "step": 13373 + }, + { + "epoch": 0.95, + "grad_norm": 1.795680611119649, + "learning_rate": 6.792495866411896e-08, + "loss": 0.4896, + "step": 13374 + }, + { + "epoch": 0.95, + "grad_norm": 1.660074754243511, + "learning_rate": 6.77363124794167e-08, + "loss": 0.5293, + "step": 13375 + }, + { + "epoch": 0.95, + "grad_norm": 2.368690918676919, + "learning_rate": 6.754792683359601e-08, + "loss": 0.5387, + "step": 13376 + }, + { + "epoch": 0.95, + "grad_norm": 1.8366273086462805, + "learning_rate": 6.735980173660728e-08, + "loss": 0.4936, + "step": 13377 + }, + { + "epoch": 0.95, + "grad_norm": 1.6567820853331534, + "learning_rate": 6.717193719838755e-08, + "loss": 0.483, + "step": 13378 + }, + { + "epoch": 0.95, + "grad_norm": 2.0485381324512937, + "learning_rate": 6.698433322886055e-08, + "loss": 0.6365, + "step": 13379 + }, + { + "epoch": 0.95, + "grad_norm": 1.6830510169028807, + "learning_rate": 6.67969898379367e-08, + "loss": 0.4756, + "step": 13380 + }, + { + "epoch": 0.95, + "grad_norm": 0.6656386080466188, + "learning_rate": 6.660990703551195e-08, + "loss": 0.4217, + "step": 13381 + }, + { + "epoch": 0.95, + "grad_norm": 1.7928496475202342, + "learning_rate": 6.642308483146842e-08, + "loss": 0.5253, + "step": 13382 + }, + { + "epoch": 0.95, + "grad_norm": 1.7833409016331307, + "learning_rate": 6.623652323567431e-08, + "loss": 0.4773, + "step": 13383 + }, + { + "epoch": 0.95, + "grad_norm": 1.754424857803028, + "learning_rate": 6.605022225798563e-08, + "loss": 0.5039, + "step": 13384 + }, + { + "epoch": 0.95, + "grad_norm": 1.5797217711390255, + "learning_rate": 6.586418190824228e-08, + "loss": 0.5636, + "step": 13385 + }, + { + "epoch": 0.95, + "grad_norm": 1.752777120473431, + "learning_rate": 6.567840219627198e-08, + "loss": 0.4835, + "step": 13386 + }, + { + "epoch": 0.95, + "grad_norm": 1.6743565230509634, + "learning_rate": 6.549288313188851e-08, + "loss": 0.5561, + "step": 13387 + }, + { + "epoch": 0.95, + "grad_norm": 1.654180398810232, + "learning_rate": 6.530762472489128e-08, + "loss": 0.5148, + "step": 13388 + }, + { + "epoch": 0.95, + "grad_norm": 1.6400643340806267, + "learning_rate": 6.512262698506632e-08, + "loss": 0.5367, + "step": 13389 + }, + { + "epoch": 0.95, + "grad_norm": 1.5844814019226747, + "learning_rate": 6.49378899221853e-08, + "loss": 0.4425, + "step": 13390 + }, + { + "epoch": 0.95, + "grad_norm": 1.6315284585406573, + "learning_rate": 6.475341354600762e-08, + "loss": 0.4719, + "step": 13391 + }, + { + "epoch": 0.95, + "grad_norm": 2.3865546067146606, + "learning_rate": 6.456919786627824e-08, + "loss": 0.5154, + "step": 13392 + }, + { + "epoch": 0.95, + "grad_norm": 1.8408464314796802, + "learning_rate": 6.438524289272662e-08, + "loss": 0.4753, + "step": 13393 + }, + { + "epoch": 0.95, + "grad_norm": 1.6925066164838034, + "learning_rate": 6.420154863507056e-08, + "loss": 0.5268, + "step": 13394 + }, + { + "epoch": 0.95, + "grad_norm": 2.004397655373775, + "learning_rate": 6.401811510301337e-08, + "loss": 0.5238, + "step": 13395 + }, + { + "epoch": 0.95, + "grad_norm": 1.7962022178319446, + "learning_rate": 6.38349423062451e-08, + "loss": 0.6012, + "step": 13396 + }, + { + "epoch": 0.95, + "grad_norm": 2.0907976447317504, + "learning_rate": 6.365203025444133e-08, + "loss": 0.5654, + "step": 13397 + }, + { + "epoch": 0.95, + "grad_norm": 1.7298887900558526, + "learning_rate": 6.346937895726324e-08, + "loss": 0.5206, + "step": 13398 + }, + { + "epoch": 0.95, + "grad_norm": 1.648360626517202, + "learning_rate": 6.328698842436032e-08, + "loss": 0.5509, + "step": 13399 + }, + { + "epoch": 0.95, + "grad_norm": 2.1081303306850607, + "learning_rate": 6.310485866536653e-08, + "loss": 0.576, + "step": 13400 + }, + { + "epoch": 0.95, + "grad_norm": 1.595806411843447, + "learning_rate": 6.292298968990251e-08, + "loss": 0.5213, + "step": 13401 + }, + { + "epoch": 0.95, + "grad_norm": 1.5675183188381925, + "learning_rate": 6.274138150757503e-08, + "loss": 0.5337, + "step": 13402 + }, + { + "epoch": 0.95, + "grad_norm": 1.9137576573693766, + "learning_rate": 6.256003412797807e-08, + "loss": 0.462, + "step": 13403 + }, + { + "epoch": 0.95, + "grad_norm": 0.6812983168941994, + "learning_rate": 6.237894756069007e-08, + "loss": 0.391, + "step": 13404 + }, + { + "epoch": 0.95, + "grad_norm": 1.6429217492421295, + "learning_rate": 6.219812181527673e-08, + "loss": 0.5293, + "step": 13405 + }, + { + "epoch": 0.95, + "grad_norm": 1.5710539652249362, + "learning_rate": 6.201755690129096e-08, + "loss": 0.4542, + "step": 13406 + }, + { + "epoch": 0.95, + "grad_norm": 1.8411509097008711, + "learning_rate": 6.183725282826958e-08, + "loss": 0.5353, + "step": 13407 + }, + { + "epoch": 0.95, + "grad_norm": 1.596627721615267, + "learning_rate": 6.165720960573662e-08, + "loss": 0.4664, + "step": 13408 + }, + { + "epoch": 0.95, + "grad_norm": 1.6771937366903975, + "learning_rate": 6.147742724320394e-08, + "loss": 0.5177, + "step": 13409 + }, + { + "epoch": 0.95, + "grad_norm": 2.094216082932814, + "learning_rate": 6.129790575016725e-08, + "loss": 0.5265, + "step": 13410 + }, + { + "epoch": 0.95, + "grad_norm": 1.6540755149447988, + "learning_rate": 6.111864513611065e-08, + "loss": 0.5314, + "step": 13411 + }, + { + "epoch": 0.95, + "grad_norm": 1.7861375265426342, + "learning_rate": 6.093964541050157e-08, + "loss": 0.5685, + "step": 13412 + }, + { + "epoch": 0.95, + "grad_norm": 1.5748671680450401, + "learning_rate": 6.076090658279632e-08, + "loss": 0.5102, + "step": 13413 + }, + { + "epoch": 0.95, + "grad_norm": 2.556161371158732, + "learning_rate": 6.05824286624368e-08, + "loss": 0.5259, + "step": 13414 + }, + { + "epoch": 0.95, + "grad_norm": 1.8344635685275616, + "learning_rate": 6.040421165884991e-08, + "loss": 0.5275, + "step": 13415 + }, + { + "epoch": 0.95, + "grad_norm": 1.5229663424963504, + "learning_rate": 6.02262555814509e-08, + "loss": 0.4712, + "step": 13416 + }, + { + "epoch": 0.95, + "grad_norm": 1.4376129889489422, + "learning_rate": 6.004856043963836e-08, + "loss": 0.4563, + "step": 13417 + }, + { + "epoch": 0.95, + "grad_norm": 1.7876184649939328, + "learning_rate": 5.987112624280034e-08, + "loss": 0.4921, + "step": 13418 + }, + { + "epoch": 0.95, + "grad_norm": 1.6340068293474668, + "learning_rate": 5.969395300030878e-08, + "loss": 0.441, + "step": 13419 + }, + { + "epoch": 0.95, + "grad_norm": 1.7506523443137987, + "learning_rate": 5.9517040721522314e-08, + "loss": 0.4997, + "step": 13420 + }, + { + "epoch": 0.95, + "grad_norm": 1.8746323175760728, + "learning_rate": 5.934038941578679e-08, + "loss": 0.5562, + "step": 13421 + }, + { + "epoch": 0.95, + "grad_norm": 1.9351534805137758, + "learning_rate": 5.916399909243309e-08, + "loss": 0.5324, + "step": 13422 + }, + { + "epoch": 0.95, + "grad_norm": 2.0505259913374143, + "learning_rate": 5.898786976077875e-08, + "loss": 0.5291, + "step": 13423 + }, + { + "epoch": 0.95, + "grad_norm": 2.0178405573273235, + "learning_rate": 5.881200143012744e-08, + "loss": 0.5263, + "step": 13424 + }, + { + "epoch": 0.95, + "grad_norm": 2.009706464231653, + "learning_rate": 5.863639410976951e-08, + "loss": 0.5819, + "step": 13425 + }, + { + "epoch": 0.95, + "grad_norm": 3.319199156660441, + "learning_rate": 5.846104780898032e-08, + "loss": 0.5691, + "step": 13426 + }, + { + "epoch": 0.95, + "grad_norm": 1.8535511951251624, + "learning_rate": 5.8285962537023565e-08, + "loss": 0.5028, + "step": 13427 + }, + { + "epoch": 0.95, + "grad_norm": 1.9534541453852678, + "learning_rate": 5.81111383031463e-08, + "loss": 0.6048, + "step": 13428 + }, + { + "epoch": 0.95, + "grad_norm": 1.5827648951870454, + "learning_rate": 5.793657511658502e-08, + "loss": 0.4509, + "step": 13429 + }, + { + "epoch": 0.95, + "grad_norm": 1.5980687985662734, + "learning_rate": 5.776227298655901e-08, + "loss": 0.4672, + "step": 13430 + }, + { + "epoch": 0.95, + "grad_norm": 12.582911141647005, + "learning_rate": 5.7588231922277025e-08, + "loss": 0.4788, + "step": 13431 + }, + { + "epoch": 0.95, + "grad_norm": 2.0168425799177743, + "learning_rate": 5.7414451932931156e-08, + "loss": 0.5328, + "step": 13432 + }, + { + "epoch": 0.95, + "grad_norm": 1.7272323532949372, + "learning_rate": 5.724093302770184e-08, + "loss": 0.4978, + "step": 13433 + }, + { + "epoch": 0.95, + "grad_norm": 1.6801779973860833, + "learning_rate": 5.706767521575563e-08, + "loss": 0.4898, + "step": 13434 + }, + { + "epoch": 0.95, + "grad_norm": 4.354322021760506, + "learning_rate": 5.689467850624242e-08, + "loss": 0.5061, + "step": 13435 + }, + { + "epoch": 0.95, + "grad_norm": 1.6484029108375429, + "learning_rate": 5.672194290830268e-08, + "loss": 0.5643, + "step": 13436 + }, + { + "epoch": 0.95, + "grad_norm": 0.6449602396775459, + "learning_rate": 5.654946843105968e-08, + "loss": 0.4276, + "step": 13437 + }, + { + "epoch": 0.95, + "grad_norm": 1.8335991159532974, + "learning_rate": 5.637725508362446e-08, + "loss": 0.5048, + "step": 13438 + }, + { + "epoch": 0.95, + "grad_norm": 0.6823278509396153, + "learning_rate": 5.620530287509418e-08, + "loss": 0.4437, + "step": 13439 + }, + { + "epoch": 0.95, + "grad_norm": 1.5729144392492262, + "learning_rate": 5.603361181455158e-08, + "loss": 0.5043, + "step": 13440 + }, + { + "epoch": 0.95, + "grad_norm": 0.7092050047082739, + "learning_rate": 5.586218191106607e-08, + "loss": 0.4196, + "step": 13441 + }, + { + "epoch": 0.95, + "grad_norm": 1.7931022657314613, + "learning_rate": 5.5691013173693185e-08, + "loss": 0.4904, + "step": 13442 + }, + { + "epoch": 0.95, + "grad_norm": 1.6665382183279451, + "learning_rate": 5.552010561147458e-08, + "loss": 0.4718, + "step": 13443 + }, + { + "epoch": 0.95, + "grad_norm": 0.6819431982322116, + "learning_rate": 5.5349459233438044e-08, + "loss": 0.4318, + "step": 13444 + }, + { + "epoch": 0.95, + "grad_norm": 1.7922030320275668, + "learning_rate": 5.517907404859802e-08, + "loss": 0.4759, + "step": 13445 + }, + { + "epoch": 0.95, + "grad_norm": 2.0445390749198236, + "learning_rate": 5.500895006595452e-08, + "loss": 0.5378, + "step": 13446 + }, + { + "epoch": 0.95, + "grad_norm": 1.513944227125897, + "learning_rate": 5.483908729449372e-08, + "loss": 0.4641, + "step": 13447 + }, + { + "epoch": 0.95, + "grad_norm": 1.7558670197483133, + "learning_rate": 5.466948574318953e-08, + "loss": 0.4362, + "step": 13448 + }, + { + "epoch": 0.95, + "grad_norm": 1.447877989305085, + "learning_rate": 5.4500145420999795e-08, + "loss": 0.4366, + "step": 13449 + }, + { + "epoch": 0.95, + "grad_norm": 1.592575130963031, + "learning_rate": 5.433106633687013e-08, + "loss": 0.5419, + "step": 13450 + }, + { + "epoch": 0.95, + "grad_norm": 1.6628726277697317, + "learning_rate": 5.416224849973173e-08, + "loss": 0.5037, + "step": 13451 + }, + { + "epoch": 0.95, + "grad_norm": 1.6641599200784583, + "learning_rate": 5.3993691918502455e-08, + "loss": 0.4721, + "step": 13452 + }, + { + "epoch": 0.95, + "grad_norm": 2.7159631757985503, + "learning_rate": 5.382539660208519e-08, + "loss": 0.5327, + "step": 13453 + }, + { + "epoch": 0.95, + "grad_norm": 0.7041378142271149, + "learning_rate": 5.36573625593706e-08, + "loss": 0.4223, + "step": 13454 + }, + { + "epoch": 0.95, + "grad_norm": 1.9161707904837102, + "learning_rate": 5.348958979923491e-08, + "loss": 0.5372, + "step": 13455 + }, + { + "epoch": 0.95, + "grad_norm": 1.765784321385749, + "learning_rate": 5.332207833053937e-08, + "loss": 0.534, + "step": 13456 + }, + { + "epoch": 0.95, + "grad_norm": 2.5480151394848547, + "learning_rate": 5.315482816213413e-08, + "loss": 0.5627, + "step": 13457 + }, + { + "epoch": 0.96, + "grad_norm": 1.8943917553211822, + "learning_rate": 5.2987839302852676e-08, + "loss": 0.4621, + "step": 13458 + }, + { + "epoch": 0.96, + "grad_norm": 2.3490621921712256, + "learning_rate": 5.282111176151683e-08, + "loss": 0.4683, + "step": 13459 + }, + { + "epoch": 0.96, + "grad_norm": 1.8083271366746292, + "learning_rate": 5.265464554693234e-08, + "loss": 0.5542, + "step": 13460 + }, + { + "epoch": 0.96, + "grad_norm": 1.811398994035874, + "learning_rate": 5.2488440667893825e-08, + "loss": 0.5767, + "step": 13461 + }, + { + "epoch": 0.96, + "grad_norm": 1.8280836424957276, + "learning_rate": 5.2322497133180386e-08, + "loss": 0.5106, + "step": 13462 + }, + { + "epoch": 0.96, + "grad_norm": 1.5580861217613675, + "learning_rate": 5.2156814951557225e-08, + "loss": 0.4954, + "step": 13463 + }, + { + "epoch": 0.96, + "grad_norm": 1.7099717565933956, + "learning_rate": 5.199139413177734e-08, + "loss": 0.5573, + "step": 13464 + }, + { + "epoch": 0.96, + "grad_norm": 2.075248734992447, + "learning_rate": 5.182623468257819e-08, + "loss": 0.4672, + "step": 13465 + }, + { + "epoch": 0.96, + "grad_norm": 1.6280732456691183, + "learning_rate": 5.166133661268335e-08, + "loss": 0.5696, + "step": 13466 + }, + { + "epoch": 0.96, + "grad_norm": 0.7361206626530917, + "learning_rate": 5.1496699930804196e-08, + "loss": 0.4428, + "step": 13467 + }, + { + "epoch": 0.96, + "grad_norm": 1.7535188431145043, + "learning_rate": 5.1332324645637646e-08, + "loss": 0.5916, + "step": 13468 + }, + { + "epoch": 0.96, + "grad_norm": 1.677611782393833, + "learning_rate": 5.1168210765865644e-08, + "loss": 0.4919, + "step": 13469 + }, + { + "epoch": 0.96, + "grad_norm": 1.5847152796425827, + "learning_rate": 5.1004358300157374e-08, + "loss": 0.5073, + "step": 13470 + }, + { + "epoch": 0.96, + "grad_norm": 1.7953184313424153, + "learning_rate": 5.084076725716924e-08, + "loss": 0.4391, + "step": 13471 + }, + { + "epoch": 0.96, + "grad_norm": 1.5746857107565237, + "learning_rate": 5.0677437645541e-08, + "loss": 0.5457, + "step": 13472 + }, + { + "epoch": 0.96, + "grad_norm": 1.7436937206615564, + "learning_rate": 5.0514369473901315e-08, + "loss": 0.4695, + "step": 13473 + }, + { + "epoch": 0.96, + "grad_norm": 1.627958593764295, + "learning_rate": 5.035156275086439e-08, + "loss": 0.4771, + "step": 13474 + }, + { + "epoch": 0.96, + "grad_norm": 1.7675633944848665, + "learning_rate": 5.0189017485028915e-08, + "loss": 0.458, + "step": 13475 + }, + { + "epoch": 0.96, + "grad_norm": 1.5939533651406348, + "learning_rate": 5.0026733684982455e-08, + "loss": 0.5386, + "step": 13476 + }, + { + "epoch": 0.96, + "grad_norm": 1.6893117489822334, + "learning_rate": 4.986471135929538e-08, + "loss": 0.5065, + "step": 13477 + }, + { + "epoch": 0.96, + "grad_norm": 0.6784606065507232, + "learning_rate": 4.970295051652918e-08, + "loss": 0.4282, + "step": 13478 + }, + { + "epoch": 0.96, + "grad_norm": 0.7473703701368204, + "learning_rate": 4.954145116522591e-08, + "loss": 0.42, + "step": 13479 + }, + { + "epoch": 0.96, + "grad_norm": 1.887855512881831, + "learning_rate": 4.9380213313918736e-08, + "loss": 0.5222, + "step": 13480 + }, + { + "epoch": 0.96, + "grad_norm": 1.986767390150602, + "learning_rate": 4.921923697112252e-08, + "loss": 0.5383, + "step": 13481 + }, + { + "epoch": 0.96, + "grad_norm": 2.1592848554386364, + "learning_rate": 4.905852214534268e-08, + "loss": 0.5288, + "step": 13482 + }, + { + "epoch": 0.96, + "grad_norm": 1.53979915954014, + "learning_rate": 4.889806884506687e-08, + "loss": 0.4108, + "step": 13483 + }, + { + "epoch": 0.96, + "grad_norm": 1.8262072779890686, + "learning_rate": 4.87378770787722e-08, + "loss": 0.554, + "step": 13484 + }, + { + "epoch": 0.96, + "grad_norm": 2.07797290670762, + "learning_rate": 4.857794685492023e-08, + "loss": 0.5784, + "step": 13485 + }, + { + "epoch": 0.96, + "grad_norm": 2.0143849669217406, + "learning_rate": 4.84182781819581e-08, + "loss": 0.5607, + "step": 13486 + }, + { + "epoch": 0.96, + "grad_norm": 1.673227367902069, + "learning_rate": 4.8258871068321834e-08, + "loss": 0.4984, + "step": 13487 + }, + { + "epoch": 0.96, + "grad_norm": 1.943486479082497, + "learning_rate": 4.8099725522429695e-08, + "loss": 0.5479, + "step": 13488 + }, + { + "epoch": 0.96, + "grad_norm": 2.8726823283365865, + "learning_rate": 4.794084155268996e-08, + "loss": 0.5234, + "step": 13489 + }, + { + "epoch": 0.96, + "grad_norm": 1.616155205355414, + "learning_rate": 4.7782219167494814e-08, + "loss": 0.4513, + "step": 13490 + }, + { + "epoch": 0.96, + "grad_norm": 1.7493781605692384, + "learning_rate": 4.76238583752231e-08, + "loss": 0.447, + "step": 13491 + }, + { + "epoch": 0.96, + "grad_norm": 1.6206661781984562, + "learning_rate": 4.746575918424034e-08, + "loss": 0.4895, + "step": 13492 + }, + { + "epoch": 0.96, + "grad_norm": 2.9850481665196864, + "learning_rate": 4.730792160289766e-08, + "loss": 0.4919, + "step": 13493 + }, + { + "epoch": 0.96, + "grad_norm": 1.7267101071897075, + "learning_rate": 4.7150345639532245e-08, + "loss": 0.5008, + "step": 13494 + }, + { + "epoch": 0.96, + "grad_norm": 2.0914627693709575, + "learning_rate": 4.699303130246857e-08, + "loss": 0.5206, + "step": 13495 + }, + { + "epoch": 0.96, + "grad_norm": 1.8705177864367013, + "learning_rate": 4.6835978600015544e-08, + "loss": 0.5429, + "step": 13496 + }, + { + "epoch": 0.96, + "grad_norm": 1.6849582389661173, + "learning_rate": 4.66791875404704e-08, + "loss": 0.5294, + "step": 13497 + }, + { + "epoch": 0.96, + "grad_norm": 1.7192704174323055, + "learning_rate": 4.652265813211376e-08, + "loss": 0.5216, + "step": 13498 + }, + { + "epoch": 0.96, + "grad_norm": 1.8281659337905243, + "learning_rate": 4.6366390383215663e-08, + "loss": 0.5214, + "step": 13499 + }, + { + "epoch": 0.96, + "grad_norm": 1.7284313841117678, + "learning_rate": 4.621038430203006e-08, + "loss": 0.5356, + "step": 13500 + }, + { + "epoch": 0.96, + "grad_norm": 1.5790346510915527, + "learning_rate": 4.6054639896797594e-08, + "loss": 0.4722, + "step": 13501 + }, + { + "epoch": 0.96, + "grad_norm": 1.7221845124017083, + "learning_rate": 4.589915717574556e-08, + "loss": 0.4753, + "step": 13502 + }, + { + "epoch": 0.96, + "grad_norm": 2.1566720490876325, + "learning_rate": 4.5743936147086274e-08, + "loss": 0.5799, + "step": 13503 + }, + { + "epoch": 0.96, + "grad_norm": 1.913353239577548, + "learning_rate": 4.558897681901986e-08, + "loss": 0.4962, + "step": 13504 + }, + { + "epoch": 0.96, + "grad_norm": 1.6636466457459353, + "learning_rate": 4.543427919973142e-08, + "loss": 0.5215, + "step": 13505 + }, + { + "epoch": 0.96, + "grad_norm": 1.666692080266022, + "learning_rate": 4.527984329739277e-08, + "loss": 0.5016, + "step": 13506 + }, + { + "epoch": 0.96, + "grad_norm": 1.7494176440550904, + "learning_rate": 4.512566912016181e-08, + "loss": 0.5272, + "step": 13507 + }, + { + "epoch": 0.96, + "grad_norm": 1.710727193725113, + "learning_rate": 4.4971756676182586e-08, + "loss": 0.5358, + "step": 13508 + }, + { + "epoch": 0.96, + "grad_norm": 1.71237987896558, + "learning_rate": 4.4818105973584714e-08, + "loss": 0.4912, + "step": 13509 + }, + { + "epoch": 0.96, + "grad_norm": 1.982262899944758, + "learning_rate": 4.4664717020485024e-08, + "loss": 0.4882, + "step": 13510 + }, + { + "epoch": 0.96, + "grad_norm": 1.5248669573717897, + "learning_rate": 4.451158982498594e-08, + "loss": 0.542, + "step": 13511 + }, + { + "epoch": 0.96, + "grad_norm": 1.5829819300019203, + "learning_rate": 4.435872439517597e-08, + "loss": 0.5098, + "step": 13512 + }, + { + "epoch": 0.96, + "grad_norm": 1.741175371108839, + "learning_rate": 4.4206120739130887e-08, + "loss": 0.515, + "step": 13513 + }, + { + "epoch": 0.96, + "grad_norm": 1.5544050713235698, + "learning_rate": 4.40537788649098e-08, + "loss": 0.4409, + "step": 13514 + }, + { + "epoch": 0.96, + "grad_norm": 1.6391655996414793, + "learning_rate": 4.390169878056238e-08, + "loss": 0.527, + "step": 13515 + }, + { + "epoch": 0.96, + "grad_norm": 0.7420280671547909, + "learning_rate": 4.374988049411999e-08, + "loss": 0.4156, + "step": 13516 + }, + { + "epoch": 0.96, + "grad_norm": 0.6583594213476687, + "learning_rate": 4.359832401360398e-08, + "loss": 0.4109, + "step": 13517 + }, + { + "epoch": 0.96, + "grad_norm": 1.4812647286569571, + "learning_rate": 4.344702934701794e-08, + "loss": 0.4315, + "step": 13518 + }, + { + "epoch": 0.96, + "grad_norm": 1.7634519624310165, + "learning_rate": 4.32959965023555e-08, + "loss": 0.5262, + "step": 13519 + }, + { + "epoch": 0.96, + "grad_norm": 0.670074050413529, + "learning_rate": 4.314522548759414e-08, + "loss": 0.4321, + "step": 13520 + }, + { + "epoch": 0.96, + "grad_norm": 1.5084187762874215, + "learning_rate": 4.299471631069751e-08, + "loss": 0.5465, + "step": 13521 + }, + { + "epoch": 0.96, + "grad_norm": 0.6256008576612377, + "learning_rate": 4.284446897961703e-08, + "loss": 0.4006, + "step": 13522 + }, + { + "epoch": 0.96, + "grad_norm": 1.841570993112766, + "learning_rate": 4.2694483502289105e-08, + "loss": 0.5575, + "step": 13523 + }, + { + "epoch": 0.96, + "grad_norm": 1.7727537525765926, + "learning_rate": 4.254475988663631e-08, + "loss": 0.5703, + "step": 13524 + }, + { + "epoch": 0.96, + "grad_norm": 1.692731591299952, + "learning_rate": 4.239529814056675e-08, + "loss": 0.5342, + "step": 13525 + }, + { + "epoch": 0.96, + "grad_norm": 1.5533537830820223, + "learning_rate": 4.224609827197634e-08, + "loss": 0.4791, + "step": 13526 + }, + { + "epoch": 0.96, + "grad_norm": 1.724535382062079, + "learning_rate": 4.209716028874655e-08, + "loss": 0.5199, + "step": 13527 + }, + { + "epoch": 0.96, + "grad_norm": 2.0221821169698915, + "learning_rate": 4.194848419874387e-08, + "loss": 0.4749, + "step": 13528 + }, + { + "epoch": 0.96, + "grad_norm": 1.7715978565470583, + "learning_rate": 4.1800070009823113e-08, + "loss": 0.5601, + "step": 13529 + }, + { + "epoch": 0.96, + "grad_norm": 1.7254247664983484, + "learning_rate": 4.165191772982302e-08, + "loss": 0.5195, + "step": 13530 + }, + { + "epoch": 0.96, + "grad_norm": 1.586477851604231, + "learning_rate": 4.150402736656955e-08, + "loss": 0.5096, + "step": 13531 + }, + { + "epoch": 0.96, + "grad_norm": 2.231661655399869, + "learning_rate": 4.135639892787535e-08, + "loss": 0.5716, + "step": 13532 + }, + { + "epoch": 0.96, + "grad_norm": 1.7281970538905138, + "learning_rate": 4.120903242153862e-08, + "loss": 0.503, + "step": 13533 + }, + { + "epoch": 0.96, + "grad_norm": 1.6331855019557588, + "learning_rate": 4.106192785534258e-08, + "loss": 0.4809, + "step": 13534 + }, + { + "epoch": 0.96, + "grad_norm": 1.5966585075269586, + "learning_rate": 4.091508523705934e-08, + "loss": 0.5459, + "step": 13535 + }, + { + "epoch": 0.96, + "grad_norm": 0.7406712939253222, + "learning_rate": 4.076850457444492e-08, + "loss": 0.4389, + "step": 13536 + }, + { + "epoch": 0.96, + "grad_norm": 1.943912872534278, + "learning_rate": 4.0622185875242024e-08, + "loss": 0.4836, + "step": 13537 + }, + { + "epoch": 0.96, + "grad_norm": 1.8733216805567499, + "learning_rate": 4.0476129147180573e-08, + "loss": 0.5541, + "step": 13538 + }, + { + "epoch": 0.96, + "grad_norm": 1.5284225675624992, + "learning_rate": 4.0330334397974405e-08, + "loss": 0.4994, + "step": 13539 + }, + { + "epoch": 0.96, + "grad_norm": 1.7926471502953936, + "learning_rate": 4.0184801635325676e-08, + "loss": 0.4934, + "step": 13540 + }, + { + "epoch": 0.96, + "grad_norm": 1.6630940973155597, + "learning_rate": 4.003953086692214e-08, + "loss": 0.4577, + "step": 13541 + }, + { + "epoch": 0.96, + "grad_norm": 1.5876445041765532, + "learning_rate": 3.989452210043709e-08, + "loss": 0.5004, + "step": 13542 + }, + { + "epoch": 0.96, + "grad_norm": 1.901179571493885, + "learning_rate": 3.9749775343531086e-08, + "loss": 0.5237, + "step": 13543 + }, + { + "epoch": 0.96, + "grad_norm": 1.9568972197303869, + "learning_rate": 3.96052906038491e-08, + "loss": 0.6032, + "step": 13544 + }, + { + "epoch": 0.96, + "grad_norm": 1.918394152973589, + "learning_rate": 3.946106788902337e-08, + "loss": 0.5536, + "step": 13545 + }, + { + "epoch": 0.96, + "grad_norm": 2.3690753836684393, + "learning_rate": 3.9317107206673364e-08, + "loss": 0.5226, + "step": 13546 + }, + { + "epoch": 0.96, + "grad_norm": 1.8967361825089084, + "learning_rate": 3.917340856440244e-08, + "loss": 0.4477, + "step": 13547 + }, + { + "epoch": 0.96, + "grad_norm": 1.8808881553752759, + "learning_rate": 3.902997196980174e-08, + "loss": 0.5154, + "step": 13548 + }, + { + "epoch": 0.96, + "grad_norm": 1.816953585530986, + "learning_rate": 3.888679743044799e-08, + "loss": 0.5508, + "step": 13549 + }, + { + "epoch": 0.96, + "grad_norm": 4.7182652723672165, + "learning_rate": 3.874388495390402e-08, + "loss": 0.5254, + "step": 13550 + }, + { + "epoch": 0.96, + "grad_norm": 1.8420155675126932, + "learning_rate": 3.8601234547719354e-08, + "loss": 0.5601, + "step": 13551 + }, + { + "epoch": 0.96, + "grad_norm": 4.589802632176212, + "learning_rate": 3.845884621942853e-08, + "loss": 0.4836, + "step": 13552 + }, + { + "epoch": 0.96, + "grad_norm": 0.7107620243714934, + "learning_rate": 3.831671997655384e-08, + "loss": 0.4326, + "step": 13553 + }, + { + "epoch": 0.96, + "grad_norm": 1.6019172693516748, + "learning_rate": 3.817485582660263e-08, + "loss": 0.4805, + "step": 13554 + }, + { + "epoch": 0.96, + "grad_norm": 1.6741964932657278, + "learning_rate": 3.803325377706779e-08, + "loss": 0.4638, + "step": 13555 + }, + { + "epoch": 0.96, + "grad_norm": 1.6907528556823936, + "learning_rate": 3.789191383543056e-08, + "loss": 0.5005, + "step": 13556 + }, + { + "epoch": 0.96, + "grad_norm": 1.6998320895461911, + "learning_rate": 3.7750836009156074e-08, + "loss": 0.4391, + "step": 13557 + }, + { + "epoch": 0.96, + "grad_norm": 1.9953528276207508, + "learning_rate": 3.7610020305696716e-08, + "loss": 0.5274, + "step": 13558 + }, + { + "epoch": 0.96, + "grad_norm": 2.0121406373910395, + "learning_rate": 3.746946673249152e-08, + "loss": 0.4483, + "step": 13559 + }, + { + "epoch": 0.96, + "grad_norm": 4.155136028532395, + "learning_rate": 3.7329175296964e-08, + "loss": 0.5726, + "step": 13560 + }, + { + "epoch": 0.96, + "grad_norm": 1.6198016887616635, + "learning_rate": 3.718914600652546e-08, + "loss": 0.4939, + "step": 13561 + }, + { + "epoch": 0.96, + "grad_norm": 0.6494925106838684, + "learning_rate": 3.7049378868572186e-08, + "loss": 0.4296, + "step": 13562 + }, + { + "epoch": 0.96, + "grad_norm": 1.6778122309291308, + "learning_rate": 3.690987389048772e-08, + "loss": 0.5128, + "step": 13563 + }, + { + "epoch": 0.96, + "grad_norm": 1.7340801444347587, + "learning_rate": 3.6770631079641185e-08, + "loss": 0.5367, + "step": 13564 + }, + { + "epoch": 0.96, + "grad_norm": 1.6983451067182276, + "learning_rate": 3.663165044338723e-08, + "loss": 0.4874, + "step": 13565 + }, + { + "epoch": 0.96, + "grad_norm": 1.7862432746652919, + "learning_rate": 3.649293198906778e-08, + "loss": 0.5221, + "step": 13566 + }, + { + "epoch": 0.96, + "grad_norm": 1.9162733189678123, + "learning_rate": 3.63544757240103e-08, + "loss": 0.5296, + "step": 13567 + }, + { + "epoch": 0.96, + "grad_norm": 0.6692983118349588, + "learning_rate": 3.621628165552893e-08, + "loss": 0.439, + "step": 13568 + }, + { + "epoch": 0.96, + "grad_norm": 1.7864579706975108, + "learning_rate": 3.607834979092284e-08, + "loss": 0.5658, + "step": 13569 + }, + { + "epoch": 0.96, + "grad_norm": 1.8307996415544872, + "learning_rate": 3.5940680137478425e-08, + "loss": 0.579, + "step": 13570 + }, + { + "epoch": 0.96, + "grad_norm": 1.5774309034230973, + "learning_rate": 3.580327270246764e-08, + "loss": 0.4579, + "step": 13571 + }, + { + "epoch": 0.96, + "grad_norm": 2.1152719534357693, + "learning_rate": 3.566612749314913e-08, + "loss": 0.5989, + "step": 13572 + }, + { + "epoch": 0.96, + "grad_norm": 2.041686939417134, + "learning_rate": 3.552924451676709e-08, + "loss": 0.5388, + "step": 13573 + }, + { + "epoch": 0.96, + "grad_norm": 1.504916420521562, + "learning_rate": 3.5392623780552415e-08, + "loss": 0.535, + "step": 13574 + }, + { + "epoch": 0.96, + "grad_norm": 1.5756269279706359, + "learning_rate": 3.52562652917221e-08, + "loss": 0.5568, + "step": 13575 + }, + { + "epoch": 0.96, + "grad_norm": 1.5653795656307867, + "learning_rate": 3.512016905747817e-08, + "loss": 0.4839, + "step": 13576 + }, + { + "epoch": 0.96, + "grad_norm": 1.6555537849264002, + "learning_rate": 3.4984335085010425e-08, + "loss": 0.5365, + "step": 13577 + }, + { + "epoch": 0.96, + "grad_norm": 2.0066329634111195, + "learning_rate": 3.484876338149368e-08, + "loss": 0.5032, + "step": 13578 + }, + { + "epoch": 0.96, + "grad_norm": 2.3571161874995776, + "learning_rate": 3.4713453954089425e-08, + "loss": 0.4739, + "step": 13579 + }, + { + "epoch": 0.96, + "grad_norm": 2.0814980573350117, + "learning_rate": 3.457840680994584e-08, + "loss": 0.5208, + "step": 13580 + }, + { + "epoch": 0.96, + "grad_norm": 1.9235533573339703, + "learning_rate": 3.444362195619555e-08, + "loss": 0.5544, + "step": 13581 + }, + { + "epoch": 0.96, + "grad_norm": 1.7370474985713849, + "learning_rate": 3.430909939995841e-08, + "loss": 0.4533, + "step": 13582 + }, + { + "epoch": 0.96, + "grad_norm": 1.629337189711682, + "learning_rate": 3.4174839148340964e-08, + "loss": 0.5149, + "step": 13583 + }, + { + "epoch": 0.96, + "grad_norm": 1.8643303570224463, + "learning_rate": 3.404084120843531e-08, + "loss": 0.5306, + "step": 13584 + }, + { + "epoch": 0.96, + "grad_norm": 1.8757311992125263, + "learning_rate": 3.390710558731913e-08, + "loss": 0.5138, + "step": 13585 + }, + { + "epoch": 0.96, + "grad_norm": 2.217709941504913, + "learning_rate": 3.377363229205732e-08, + "loss": 0.4901, + "step": 13586 + }, + { + "epoch": 0.96, + "grad_norm": 0.7214443133277657, + "learning_rate": 3.36404213296998e-08, + "loss": 0.4204, + "step": 13587 + }, + { + "epoch": 0.96, + "grad_norm": 1.561784423002264, + "learning_rate": 3.3507472707283716e-08, + "loss": 0.54, + "step": 13588 + }, + { + "epoch": 0.96, + "grad_norm": 2.3812184634857934, + "learning_rate": 3.337478643183179e-08, + "loss": 0.4746, + "step": 13589 + }, + { + "epoch": 0.96, + "grad_norm": 1.9477716493002033, + "learning_rate": 3.324236251035229e-08, + "loss": 0.5404, + "step": 13590 + }, + { + "epoch": 0.96, + "grad_norm": 1.6596289823523223, + "learning_rate": 3.31102009498413e-08, + "loss": 0.5118, + "step": 13591 + }, + { + "epoch": 0.96, + "grad_norm": 1.8617291662402755, + "learning_rate": 3.297830175727989e-08, + "loss": 0.5048, + "step": 13592 + }, + { + "epoch": 0.96, + "grad_norm": 0.666490444276437, + "learning_rate": 3.284666493963473e-08, + "loss": 0.4211, + "step": 13593 + }, + { + "epoch": 0.96, + "grad_norm": 2.0035348308753553, + "learning_rate": 3.2715290503859685e-08, + "loss": 0.5814, + "step": 13594 + }, + { + "epoch": 0.96, + "grad_norm": 2.6817175860089, + "learning_rate": 3.2584178456894766e-08, + "loss": 0.5119, + "step": 13595 + }, + { + "epoch": 0.96, + "grad_norm": 1.5583792205216291, + "learning_rate": 3.2453328805665544e-08, + "loss": 0.4477, + "step": 13596 + }, + { + "epoch": 0.96, + "grad_norm": 1.6164753386814517, + "learning_rate": 3.232274155708315e-08, + "loss": 0.5388, + "step": 13597 + }, + { + "epoch": 0.96, + "grad_norm": 0.6388501041058647, + "learning_rate": 3.219241671804707e-08, + "loss": 0.4284, + "step": 13598 + }, + { + "epoch": 0.97, + "grad_norm": 1.9173507737396176, + "learning_rate": 3.206235429544069e-08, + "loss": 0.5276, + "step": 13599 + }, + { + "epoch": 0.97, + "grad_norm": 1.9950124470494812, + "learning_rate": 3.193255429613407e-08, + "loss": 0.5455, + "step": 13600 + }, + { + "epoch": 0.97, + "grad_norm": 2.6266415874671907, + "learning_rate": 3.1803016726983936e-08, + "loss": 0.556, + "step": 13601 + }, + { + "epoch": 0.97, + "grad_norm": 0.6719188961522228, + "learning_rate": 3.1673741594833165e-08, + "loss": 0.4158, + "step": 13602 + }, + { + "epoch": 0.97, + "grad_norm": 0.7311484360621182, + "learning_rate": 3.154472890651072e-08, + "loss": 0.4124, + "step": 13603 + }, + { + "epoch": 0.97, + "grad_norm": 1.599198582280624, + "learning_rate": 3.141597866883117e-08, + "loss": 0.4779, + "step": 13604 + }, + { + "epoch": 0.97, + "grad_norm": 1.6740225150508983, + "learning_rate": 3.128749088859517e-08, + "loss": 0.4813, + "step": 13605 + }, + { + "epoch": 0.97, + "grad_norm": 1.6566542992114166, + "learning_rate": 3.115926557259008e-08, + "loss": 0.4857, + "step": 13606 + }, + { + "epoch": 0.97, + "grad_norm": 1.570729639738782, + "learning_rate": 3.103130272758936e-08, + "loss": 0.5054, + "step": 13607 + }, + { + "epoch": 0.97, + "grad_norm": 1.6772865886331187, + "learning_rate": 3.0903602360352613e-08, + "loss": 0.4752, + "step": 13608 + }, + { + "epoch": 0.97, + "grad_norm": 1.7484575967352922, + "learning_rate": 3.0776164477624994e-08, + "loss": 0.4824, + "step": 13609 + }, + { + "epoch": 0.97, + "grad_norm": 1.6022771260224067, + "learning_rate": 3.064898908613834e-08, + "loss": 0.4974, + "step": 13610 + }, + { + "epoch": 0.97, + "grad_norm": 1.5108438787217773, + "learning_rate": 3.0522076192610605e-08, + "loss": 0.4791, + "step": 13611 + }, + { + "epoch": 0.97, + "grad_norm": 0.6605534377592105, + "learning_rate": 3.039542580374588e-08, + "loss": 0.4043, + "step": 13612 + }, + { + "epoch": 0.97, + "grad_norm": 0.7212945818309867, + "learning_rate": 3.026903792623381e-08, + "loss": 0.4343, + "step": 13613 + }, + { + "epoch": 0.97, + "grad_norm": 1.6912877547802512, + "learning_rate": 3.014291256675128e-08, + "loss": 0.5168, + "step": 13614 + }, + { + "epoch": 0.97, + "grad_norm": 1.8620807891718332, + "learning_rate": 3.001704973196018e-08, + "loss": 0.5582, + "step": 13615 + }, + { + "epoch": 0.97, + "grad_norm": 1.7050529085162809, + "learning_rate": 2.989144942850852e-08, + "loss": 0.5593, + "step": 13616 + }, + { + "epoch": 0.97, + "grad_norm": 4.322949221548623, + "learning_rate": 2.9766111663032115e-08, + "loss": 0.4778, + "step": 13617 + }, + { + "epoch": 0.97, + "grad_norm": 1.8020855254567174, + "learning_rate": 2.9641036442151217e-08, + "loss": 0.5149, + "step": 13618 + }, + { + "epoch": 0.97, + "grad_norm": 1.5942868039249611, + "learning_rate": 2.9516223772472206e-08, + "loss": 0.5546, + "step": 13619 + }, + { + "epoch": 0.97, + "grad_norm": 1.6312543665024264, + "learning_rate": 2.9391673660588705e-08, + "loss": 0.5414, + "step": 13620 + }, + { + "epoch": 0.97, + "grad_norm": 2.478712385208612, + "learning_rate": 2.9267386113079888e-08, + "loss": 0.5105, + "step": 13621 + }, + { + "epoch": 0.97, + "grad_norm": 1.8503865983330183, + "learning_rate": 2.914336113651106e-08, + "loss": 0.4945, + "step": 13622 + }, + { + "epoch": 0.97, + "grad_norm": 2.0201830435343378, + "learning_rate": 2.9019598737433096e-08, + "loss": 0.5202, + "step": 13623 + }, + { + "epoch": 0.97, + "grad_norm": 1.9387429773907985, + "learning_rate": 2.8896098922384096e-08, + "loss": 0.5493, + "step": 13624 + }, + { + "epoch": 0.97, + "grad_norm": 1.9344569066301873, + "learning_rate": 2.877286169788718e-08, + "loss": 0.5093, + "step": 13625 + }, + { + "epoch": 0.97, + "grad_norm": 1.4473988234474753, + "learning_rate": 2.864988707045324e-08, + "loss": 0.4894, + "step": 13626 + }, + { + "epoch": 0.97, + "grad_norm": 1.988243036038166, + "learning_rate": 2.852717504657654e-08, + "loss": 0.5689, + "step": 13627 + }, + { + "epoch": 0.97, + "grad_norm": 1.7367526270529918, + "learning_rate": 2.8404725632740772e-08, + "loss": 0.4662, + "step": 13628 + }, + { + "epoch": 0.97, + "grad_norm": 1.73945726750156, + "learning_rate": 2.8282538835413542e-08, + "loss": 0.5079, + "step": 13629 + }, + { + "epoch": 0.97, + "grad_norm": 1.572319806330478, + "learning_rate": 2.816061466104858e-08, + "loss": 0.4485, + "step": 13630 + }, + { + "epoch": 0.97, + "grad_norm": 1.622408719602149, + "learning_rate": 2.8038953116087396e-08, + "loss": 0.5009, + "step": 13631 + }, + { + "epoch": 0.97, + "grad_norm": 1.5254061167801063, + "learning_rate": 2.791755420695541e-08, + "loss": 0.5748, + "step": 13632 + }, + { + "epoch": 0.97, + "grad_norm": 1.653291167903297, + "learning_rate": 2.7796417940066378e-08, + "loss": 0.5786, + "step": 13633 + }, + { + "epoch": 0.97, + "grad_norm": 1.4548676973965442, + "learning_rate": 2.7675544321818514e-08, + "loss": 0.4788, + "step": 13634 + }, + { + "epoch": 0.97, + "grad_norm": 1.5287913983949086, + "learning_rate": 2.7554933358597267e-08, + "loss": 0.5556, + "step": 13635 + }, + { + "epoch": 0.97, + "grad_norm": 1.806706186799095, + "learning_rate": 2.74345850567731e-08, + "loss": 0.562, + "step": 13636 + }, + { + "epoch": 0.97, + "grad_norm": 1.6652886890732341, + "learning_rate": 2.7314499422703146e-08, + "loss": 0.4705, + "step": 13637 + }, + { + "epoch": 0.97, + "grad_norm": 2.7546842854479214, + "learning_rate": 2.7194676462731772e-08, + "loss": 0.4549, + "step": 13638 + }, + { + "epoch": 0.97, + "grad_norm": 1.6199775439149924, + "learning_rate": 2.7075116183187256e-08, + "loss": 0.5554, + "step": 13639 + }, + { + "epoch": 0.97, + "grad_norm": 1.6946665625084318, + "learning_rate": 2.6955818590385652e-08, + "loss": 0.5218, + "step": 13640 + }, + { + "epoch": 0.97, + "grad_norm": 1.5588065002615186, + "learning_rate": 2.6836783690629142e-08, + "loss": 0.5397, + "step": 13641 + }, + { + "epoch": 0.97, + "grad_norm": 2.713855278728243, + "learning_rate": 2.671801149020492e-08, + "loss": 0.4988, + "step": 13642 + }, + { + "epoch": 0.97, + "grad_norm": 1.8770952322963037, + "learning_rate": 2.6599501995386857e-08, + "loss": 0.4687, + "step": 13643 + }, + { + "epoch": 0.97, + "grad_norm": 1.748130952268625, + "learning_rate": 2.64812552124355e-08, + "loss": 0.5558, + "step": 13644 + }, + { + "epoch": 0.97, + "grad_norm": 1.6037063936991014, + "learning_rate": 2.636327114759696e-08, + "loss": 0.4643, + "step": 13645 + }, + { + "epoch": 0.97, + "grad_norm": 1.85709122912522, + "learning_rate": 2.624554980710292e-08, + "loss": 0.5483, + "step": 13646 + }, + { + "epoch": 0.97, + "grad_norm": 2.2597091627055925, + "learning_rate": 2.6128091197172856e-08, + "loss": 0.476, + "step": 13647 + }, + { + "epoch": 0.97, + "grad_norm": 1.7227582884415344, + "learning_rate": 2.6010895324010133e-08, + "loss": 0.4833, + "step": 13648 + }, + { + "epoch": 0.97, + "grad_norm": 1.4727964472970885, + "learning_rate": 2.589396219380702e-08, + "loss": 0.4928, + "step": 13649 + }, + { + "epoch": 0.97, + "grad_norm": 1.9138748010218598, + "learning_rate": 2.577729181273858e-08, + "loss": 0.5406, + "step": 13650 + }, + { + "epoch": 0.97, + "grad_norm": 1.7419112843146773, + "learning_rate": 2.5660884186968772e-08, + "loss": 0.5681, + "step": 13651 + }, + { + "epoch": 0.97, + "grad_norm": 1.5708899626292299, + "learning_rate": 2.554473932264656e-08, + "loss": 0.5018, + "step": 13652 + }, + { + "epoch": 0.97, + "grad_norm": 1.6817664049731726, + "learning_rate": 2.5428857225907045e-08, + "loss": 0.5086, + "step": 13653 + }, + { + "epoch": 0.97, + "grad_norm": 1.7819320293186123, + "learning_rate": 2.5313237902871436e-08, + "loss": 0.5608, + "step": 13654 + }, + { + "epoch": 0.97, + "grad_norm": 2.1301992738620137, + "learning_rate": 2.5197881359646514e-08, + "loss": 0.4853, + "step": 13655 + }, + { + "epoch": 0.97, + "grad_norm": 0.6575275659428215, + "learning_rate": 2.5082787602327407e-08, + "loss": 0.4274, + "step": 13656 + }, + { + "epoch": 0.97, + "grad_norm": 1.6153340831873553, + "learning_rate": 2.496795663699203e-08, + "loss": 0.5706, + "step": 13657 + }, + { + "epoch": 0.97, + "grad_norm": 1.8417143745977065, + "learning_rate": 2.4853388469707197e-08, + "loss": 0.4033, + "step": 13658 + }, + { + "epoch": 0.97, + "grad_norm": 1.6756737049895802, + "learning_rate": 2.473908310652473e-08, + "loss": 0.5476, + "step": 13659 + }, + { + "epoch": 0.97, + "grad_norm": 0.6580460181718044, + "learning_rate": 2.4625040553482026e-08, + "loss": 0.4198, + "step": 13660 + }, + { + "epoch": 0.97, + "grad_norm": 1.5232498454151047, + "learning_rate": 2.451126081660371e-08, + "loss": 0.4603, + "step": 13661 + }, + { + "epoch": 0.97, + "grad_norm": 1.6833046990712928, + "learning_rate": 2.439774390189942e-08, + "loss": 0.5219, + "step": 13662 + }, + { + "epoch": 0.97, + "grad_norm": 1.6865603854468418, + "learning_rate": 2.4284489815366575e-08, + "loss": 0.4891, + "step": 13663 + }, + { + "epoch": 0.97, + "grad_norm": 1.706168157501822, + "learning_rate": 2.4171498562986505e-08, + "loss": 0.4987, + "step": 13664 + }, + { + "epoch": 0.97, + "grad_norm": 1.5970857477745983, + "learning_rate": 2.4058770150728884e-08, + "loss": 0.4845, + "step": 13665 + }, + { + "epoch": 0.97, + "grad_norm": 1.6907244955922287, + "learning_rate": 2.3946304584547276e-08, + "loss": 0.5631, + "step": 13666 + }, + { + "epoch": 0.97, + "grad_norm": 1.7663907458663923, + "learning_rate": 2.383410187038304e-08, + "loss": 0.4883, + "step": 13667 + }, + { + "epoch": 0.97, + "grad_norm": 1.9361286545402132, + "learning_rate": 2.3722162014163662e-08, + "loss": 0.49, + "step": 13668 + }, + { + "epoch": 0.97, + "grad_norm": 2.2112166733851497, + "learning_rate": 2.3610485021800524e-08, + "loss": 0.5578, + "step": 13669 + }, + { + "epoch": 0.97, + "grad_norm": 1.5619156316025222, + "learning_rate": 2.349907089919501e-08, + "loss": 0.4851, + "step": 13670 + }, + { + "epoch": 0.97, + "grad_norm": 1.7106082931088564, + "learning_rate": 2.338791965223075e-08, + "loss": 0.5019, + "step": 13671 + }, + { + "epoch": 0.97, + "grad_norm": 1.751928551104829, + "learning_rate": 2.3277031286779717e-08, + "loss": 0.5528, + "step": 13672 + }, + { + "epoch": 0.97, + "grad_norm": 1.709930774002848, + "learning_rate": 2.3166405808699443e-08, + "loss": 0.5254, + "step": 13673 + }, + { + "epoch": 0.97, + "grad_norm": 1.6800445667081099, + "learning_rate": 2.3056043223833034e-08, + "loss": 0.5276, + "step": 13674 + }, + { + "epoch": 0.97, + "grad_norm": 1.5502393428718613, + "learning_rate": 2.2945943538010828e-08, + "loss": 0.4938, + "step": 13675 + }, + { + "epoch": 0.97, + "grad_norm": 1.9354817129188846, + "learning_rate": 2.283610675704817e-08, + "loss": 0.5225, + "step": 13676 + }, + { + "epoch": 0.97, + "grad_norm": 1.9833620713634694, + "learning_rate": 2.2726532886748197e-08, + "loss": 0.6167, + "step": 13677 + }, + { + "epoch": 0.97, + "grad_norm": 1.5442226917866675, + "learning_rate": 2.2617221932897393e-08, + "loss": 0.5096, + "step": 13678 + }, + { + "epoch": 0.97, + "grad_norm": 0.7277016520126909, + "learning_rate": 2.250817390127058e-08, + "loss": 0.4107, + "step": 13679 + }, + { + "epoch": 0.97, + "grad_norm": 2.109429676350748, + "learning_rate": 2.239938879762815e-08, + "loss": 0.4945, + "step": 13680 + }, + { + "epoch": 0.97, + "grad_norm": 1.8235380575146225, + "learning_rate": 2.2290866627716623e-08, + "loss": 0.5078, + "step": 13681 + }, + { + "epoch": 0.97, + "grad_norm": 1.5900978282548284, + "learning_rate": 2.2182607397268075e-08, + "loss": 0.455, + "step": 13682 + }, + { + "epoch": 0.97, + "grad_norm": 0.6757511222176741, + "learning_rate": 2.2074611112001267e-08, + "loss": 0.4236, + "step": 13683 + }, + { + "epoch": 0.97, + "grad_norm": 1.6008895925771716, + "learning_rate": 2.1966877777621077e-08, + "loss": 0.5155, + "step": 13684 + }, + { + "epoch": 0.97, + "grad_norm": 2.7898676850002455, + "learning_rate": 2.185940739981851e-08, + "loss": 0.4895, + "step": 13685 + }, + { + "epoch": 0.97, + "grad_norm": 1.4206818297431292, + "learning_rate": 2.1752199984270138e-08, + "loss": 0.4627, + "step": 13686 + }, + { + "epoch": 0.97, + "grad_norm": 1.7906074333581887, + "learning_rate": 2.164525553663921e-08, + "loss": 0.515, + "step": 13687 + }, + { + "epoch": 0.97, + "grad_norm": 1.670554487378473, + "learning_rate": 2.1538574062574534e-08, + "loss": 0.5979, + "step": 13688 + }, + { + "epoch": 0.97, + "grad_norm": 1.972688988628833, + "learning_rate": 2.1432155567712166e-08, + "loss": 0.4947, + "step": 13689 + }, + { + "epoch": 0.97, + "grad_norm": 1.7769306894580654, + "learning_rate": 2.1326000057672602e-08, + "loss": 0.5451, + "step": 13690 + }, + { + "epoch": 0.97, + "grad_norm": 1.7685589014128131, + "learning_rate": 2.122010753806414e-08, + "loss": 0.5544, + "step": 13691 + }, + { + "epoch": 0.97, + "grad_norm": 1.7475246776912268, + "learning_rate": 2.1114478014479522e-08, + "loss": 0.5995, + "step": 13692 + }, + { + "epoch": 0.97, + "grad_norm": 1.8923773704215427, + "learning_rate": 2.1009111492499845e-08, + "loss": 0.5089, + "step": 13693 + }, + { + "epoch": 0.97, + "grad_norm": 1.983006889062014, + "learning_rate": 2.090400797768899e-08, + "loss": 0.5343, + "step": 13694 + }, + { + "epoch": 0.97, + "grad_norm": 1.537101402374633, + "learning_rate": 2.079916747560029e-08, + "loss": 0.505, + "step": 13695 + }, + { + "epoch": 0.97, + "grad_norm": 1.6560284380741874, + "learning_rate": 2.069458999177154e-08, + "loss": 0.5013, + "step": 13696 + }, + { + "epoch": 0.97, + "grad_norm": 2.1106066738763447, + "learning_rate": 2.0590275531726656e-08, + "loss": 0.5415, + "step": 13697 + }, + { + "epoch": 0.97, + "grad_norm": 2.1157674101543216, + "learning_rate": 2.0486224100976228e-08, + "loss": 0.504, + "step": 13698 + }, + { + "epoch": 0.97, + "grad_norm": 1.7423651479687947, + "learning_rate": 2.0382435705015856e-08, + "loss": 0.5322, + "step": 13699 + }, + { + "epoch": 0.97, + "grad_norm": 1.6354017143892279, + "learning_rate": 2.027891034932894e-08, + "loss": 0.513, + "step": 13700 + }, + { + "epoch": 0.97, + "grad_norm": 1.7121944222048529, + "learning_rate": 2.017564803938332e-08, + "loss": 0.47, + "step": 13701 + }, + { + "epoch": 0.97, + "grad_norm": 1.6820944982391355, + "learning_rate": 2.0072648780634085e-08, + "loss": 0.5193, + "step": 13702 + }, + { + "epoch": 0.97, + "grad_norm": 2.2057795555035042, + "learning_rate": 1.9969912578521324e-08, + "loss": 0.5199, + "step": 13703 + }, + { + "epoch": 0.97, + "grad_norm": 1.81671289194695, + "learning_rate": 1.9867439438472914e-08, + "loss": 0.5321, + "step": 13704 + }, + { + "epoch": 0.97, + "grad_norm": 1.765450916375056, + "learning_rate": 1.9765229365901195e-08, + "loss": 0.5128, + "step": 13705 + }, + { + "epoch": 0.97, + "grad_norm": 1.571612705940576, + "learning_rate": 1.9663282366205737e-08, + "loss": 0.5008, + "step": 13706 + }, + { + "epoch": 0.97, + "grad_norm": 2.1366950233804265, + "learning_rate": 1.9561598444771125e-08, + "loss": 0.5923, + "step": 13707 + }, + { + "epoch": 0.97, + "grad_norm": 2.447865014207708, + "learning_rate": 1.946017760696861e-08, + "loss": 0.5672, + "step": 13708 + }, + { + "epoch": 0.97, + "grad_norm": 2.032117564033243, + "learning_rate": 1.935901985815669e-08, + "loss": 0.5649, + "step": 13709 + }, + { + "epoch": 0.97, + "grad_norm": 1.6241708649943962, + "learning_rate": 1.92581252036772e-08, + "loss": 0.4436, + "step": 13710 + }, + { + "epoch": 0.97, + "grad_norm": 0.6684570481476297, + "learning_rate": 1.915749364886088e-08, + "loss": 0.4359, + "step": 13711 + }, + { + "epoch": 0.97, + "grad_norm": 1.6267207264791732, + "learning_rate": 1.9057125199023474e-08, + "loss": 0.4982, + "step": 13712 + }, + { + "epoch": 0.97, + "grad_norm": 1.8452156479085255, + "learning_rate": 1.895701985946574e-08, + "loss": 0.5314, + "step": 13713 + }, + { + "epoch": 0.97, + "grad_norm": 1.5888422535011206, + "learning_rate": 1.8857177635476786e-08, + "loss": 0.5659, + "step": 13714 + }, + { + "epoch": 0.97, + "grad_norm": 1.8590122147672654, + "learning_rate": 1.8757598532330167e-08, + "loss": 0.5783, + "step": 13715 + }, + { + "epoch": 0.97, + "grad_norm": 1.6007233057969275, + "learning_rate": 1.865828255528612e-08, + "loss": 0.5324, + "step": 13716 + }, + { + "epoch": 0.97, + "grad_norm": 1.6651651929023643, + "learning_rate": 1.8559229709589898e-08, + "loss": 0.5501, + "step": 13717 + }, + { + "epoch": 0.97, + "grad_norm": 2.374049367068231, + "learning_rate": 1.846044000047509e-08, + "loss": 0.5583, + "step": 13718 + }, + { + "epoch": 0.97, + "grad_norm": 1.4363236054512196, + "learning_rate": 1.836191343315974e-08, + "loss": 0.4595, + "step": 13719 + }, + { + "epoch": 0.97, + "grad_norm": 0.7756100392365685, + "learning_rate": 1.8263650012848022e-08, + "loss": 0.413, + "step": 13720 + }, + { + "epoch": 0.97, + "grad_norm": 2.2459010448598997, + "learning_rate": 1.8165649744730785e-08, + "loss": 0.5276, + "step": 13721 + }, + { + "epoch": 0.97, + "grad_norm": 1.620424034689471, + "learning_rate": 1.8067912633984443e-08, + "loss": 0.5051, + "step": 13722 + }, + { + "epoch": 0.97, + "grad_norm": 1.7791096773686077, + "learning_rate": 1.797043868577264e-08, + "loss": 0.5145, + "step": 13723 + }, + { + "epoch": 0.97, + "grad_norm": 2.1915913009589794, + "learning_rate": 1.7873227905243483e-08, + "loss": 0.5551, + "step": 13724 + }, + { + "epoch": 0.97, + "grad_norm": 2.0244974723429805, + "learning_rate": 1.7776280297531757e-08, + "loss": 0.5235, + "step": 13725 + }, + { + "epoch": 0.97, + "grad_norm": 1.7637481779396387, + "learning_rate": 1.767959586775947e-08, + "loss": 0.5693, + "step": 13726 + }, { "epoch": 0.97, - "grad_norm": 2.2888006379470407, - "learning_rate": 1.7996935859486143e-08, - "loss": 0.5377, - "step": 9254 + "grad_norm": 1.9058907731768433, + "learning_rate": 1.7583174621033094e-08, + "loss": 0.5645, + "step": 13727 }, { "epoch": 0.97, - "grad_norm": 0.95190793167679, - "learning_rate": 1.7852758337904564e-08, - "loss": 0.5234, - "step": 9255 + "grad_norm": 1.7514230814994545, + "learning_rate": 1.7487016562446336e-08, + "loss": 0.5098, + "step": 13728 }, { "epoch": 0.97, - "grad_norm": 3.928375135216635, - "learning_rate": 1.7709159627787853e-08, - "loss": 0.5789, - "step": 9256 + "grad_norm": 1.7959362357364308, + "learning_rate": 1.7391121697077906e-08, + "loss": 0.5861, + "step": 13729 }, { "epoch": 0.97, - "grad_norm": 2.772545157709395, - "learning_rate": 1.7566139745818778e-08, - "loss": 0.6105, - "step": 9257 + "grad_norm": 1.8874699203887477, + "learning_rate": 1.729549002999431e-08, + "loss": 0.5855, + "step": 13730 }, { "epoch": 0.97, - "grad_norm": 0.9706682161569412, - "learning_rate": 1.7423698708612935e-08, - "loss": 0.5673, - "step": 9258 + "grad_norm": 1.7585724877207944, + "learning_rate": 1.720012156624651e-08, + "loss": 0.5312, + "step": 13731 }, { "epoch": 0.97, - "grad_norm": 2.5143022423925516, - "learning_rate": 1.72818365327182e-08, - "loss": 0.5667, - "step": 9259 + "grad_norm": 0.6764485153053917, + "learning_rate": 1.7105016310872135e-08, + "loss": 0.3926, + "step": 13732 }, { "epoch": 0.97, - "grad_norm": 3.5152224135253496, - "learning_rate": 1.7140553234616385e-08, - "loss": 0.5852, - "step": 9260 + "grad_norm": 1.740025805559989, + "learning_rate": 1.7010174268895507e-08, + "loss": 0.4328, + "step": 13733 }, { "epoch": 0.97, - "grad_norm": 2.6502532483318486, - "learning_rate": 1.6999848830721033e-08, - "loss": 0.585, - "step": 9261 + "grad_norm": 1.6003553315143626, + "learning_rate": 1.6915595445325948e-08, + "loss": 0.5345, + "step": 13734 }, { "epoch": 0.97, - "grad_norm": 2.643359676277189, - "learning_rate": 1.6859723337379064e-08, - "loss": 0.5682, - "step": 9262 + "grad_norm": 1.5939040396003548, + "learning_rate": 1.682127984516002e-08, + "loss": 0.5639, + "step": 13735 }, { "epoch": 0.97, - "grad_norm": 2.073844785345583, - "learning_rate": 1.6720176770869124e-08, - "loss": 0.6263, - "step": 9263 + "grad_norm": 1.5684646424583175, + "learning_rate": 1.6727227473378737e-08, + "loss": 0.5371, + "step": 13736 }, { "epoch": 0.97, - "grad_norm": 4.2055735703872275, - "learning_rate": 1.6581209147404355e-08, - "loss": 0.6097, - "step": 9264 + "grad_norm": 1.612242701939781, + "learning_rate": 1.6633438334951458e-08, + "loss": 0.5254, + "step": 13737 }, { "epoch": 0.97, - "grad_norm": 2.6043559102032106, - "learning_rate": 1.6442820483128508e-08, - "loss": 0.6575, - "step": 9265 + "grad_norm": 1.6283067535066, + "learning_rate": 1.653991243483144e-08, + "loss": 0.5396, + "step": 13738 + }, + { + "epoch": 0.97, + "grad_norm": 1.6102057212694685, + "learning_rate": 1.6446649777959732e-08, + "loss": 0.4473, + "step": 13739 }, { "epoch": 0.98, - "grad_norm": 2.1311347692762466, - "learning_rate": 1.630501079412039e-08, - "loss": 0.5999, - "step": 9266 + "grad_norm": 1.6069983102856222, + "learning_rate": 1.635365036926295e-08, + "loss": 0.5575, + "step": 13740 }, { "epoch": 0.98, - "grad_norm": 3.1904074031842913, - "learning_rate": 1.6167780096389417e-08, - "loss": 0.6478, - "step": 9267 + "grad_norm": 1.6081285848299098, + "learning_rate": 1.6260914213652723e-08, + "loss": 0.4573, + "step": 13741 }, { "epoch": 0.98, - "grad_norm": 2.1605938131759217, - "learning_rate": 1.603112840587895e-08, - "loss": 0.5674, - "step": 9268 + "grad_norm": 2.239849892582331, + "learning_rate": 1.616844131602846e-08, + "loss": 0.4946, + "step": 13742 }, { "epoch": 0.98, - "grad_norm": 2.7661197207313495, - "learning_rate": 1.589505573846517e-08, - "loss": 0.6057, - "step": 9269 + "grad_norm": 0.6316326425812798, + "learning_rate": 1.607623168127459e-08, + "loss": 0.4129, + "step": 13743 }, { "epoch": 0.98, - "grad_norm": 2.6447415105460883, - "learning_rate": 1.5759562109955993e-08, - "loss": 0.6398, - "step": 9270 + "grad_norm": 1.6160584751180578, + "learning_rate": 1.5984285314262214e-08, + "loss": 0.547, + "step": 13744 }, { "epoch": 0.98, - "grad_norm": 3.2687696784623625, - "learning_rate": 1.562464753609272e-08, - "loss": 0.6534, - "step": 9271 + "grad_norm": 1.7355194362348485, + "learning_rate": 1.5892602219847452e-08, + "loss": 0.4856, + "step": 13745 }, { "epoch": 0.98, - "grad_norm": 2.6228867844890065, - "learning_rate": 1.549031203254947e-08, - "loss": 0.627, - "step": 9272 + "grad_norm": 1.696070800839325, + "learning_rate": 1.580118240287476e-08, + "loss": 0.5461, + "step": 13746 }, { "epoch": 0.98, - "grad_norm": 3.4312244311880393, - "learning_rate": 1.535655561493321e-08, - "loss": 0.6083, - "step": 9273 + "grad_norm": 0.6889008900892428, + "learning_rate": 1.571002586817194e-08, + "loss": 0.4076, + "step": 13747 }, { "epoch": 0.98, - "grad_norm": 2.4986020292339344, - "learning_rate": 1.5223378298783174e-08, - "loss": 0.5576, - "step": 9274 + "grad_norm": 1.6531855458470746, + "learning_rate": 1.5619132620554034e-08, + "loss": 0.5022, + "step": 13748 }, { "epoch": 0.98, - "grad_norm": 2.519385627634563, - "learning_rate": 1.5090780099571435e-08, - "loss": 0.5539, - "step": 9275 + "grad_norm": 3.028031633821661, + "learning_rate": 1.5528502664823865e-08, + "loss": 0.5417, + "step": 13749 }, { "epoch": 0.98, - "grad_norm": 9.883104133354655, - "learning_rate": 1.4958761032702885e-08, - "loss": 0.5969, - "step": 9276 + "grad_norm": 2.241664044279924, + "learning_rate": 1.5438136005767602e-08, + "loss": 0.5637, + "step": 13750 }, { "epoch": 0.98, - "grad_norm": 2.536493973434565, - "learning_rate": 1.4827321113515259e-08, - "loss": 0.6409, - "step": 9277 + "grad_norm": 1.5859081770093417, + "learning_rate": 1.5348032648159206e-08, + "loss": 0.5727, + "step": 13751 }, { "epoch": 0.98, - "grad_norm": 2.276130559735301, - "learning_rate": 1.4696460357279118e-08, - "loss": 0.5695, - "step": 9278 + "grad_norm": 2.142684694829177, + "learning_rate": 1.5258192596757093e-08, + "loss": 0.5505, + "step": 13752 }, { "epoch": 0.98, - "grad_norm": 2.7168915784835974, - "learning_rate": 1.4566178779197305e-08, - "loss": 0.5578, - "step": 9279 + "grad_norm": 1.7521103017933197, + "learning_rate": 1.516861585630913e-08, + "loss": 0.5308, + "step": 13753 }, { "epoch": 0.98, - "grad_norm": 2.303485469832306, - "learning_rate": 1.443647639440493e-08, - "loss": 0.619, - "step": 9280 + "grad_norm": 1.7025776063386364, + "learning_rate": 1.5079302431544873e-08, + "loss": 0.5051, + "step": 13754 }, { "epoch": 0.98, - "grad_norm": 2.7280656636349336, - "learning_rate": 1.43073532179705e-08, - "loss": 0.7012, - "step": 9281 + "grad_norm": 1.6441056454535075, + "learning_rate": 1.499025232718332e-08, + "loss": 0.4834, + "step": 13755 }, { "epoch": 0.98, - "grad_norm": 2.803606301743524, - "learning_rate": 1.4178809264896454e-08, - "loss": 0.5857, - "step": 9282 + "grad_norm": 1.556282959281427, + "learning_rate": 1.4901465547928483e-08, + "loss": 0.4515, + "step": 13756 }, { "epoch": 0.98, - "grad_norm": 5.551430987761346, - "learning_rate": 1.4050844550115295e-08, - "loss": 0.6231, - "step": 9283 + "grad_norm": 2.063678524679631, + "learning_rate": 1.4812942098469396e-08, + "loss": 0.5409, + "step": 13757 }, { "epoch": 0.98, - "grad_norm": 3.3909002445584813, - "learning_rate": 1.3923459088494574e-08, - "loss": 0.5716, - "step": 9284 + "grad_norm": 1.6124437472435278, + "learning_rate": 1.4724681983483424e-08, + "loss": 0.5618, + "step": 13758 }, { "epoch": 0.98, - "grad_norm": 2.489847512775473, - "learning_rate": 1.3796652894832452e-08, - "loss": 0.6399, - "step": 9285 + "grad_norm": 1.5112333205867117, + "learning_rate": 1.463668520763184e-08, + "loss": 0.5119, + "step": 13759 }, { "epoch": 0.98, - "grad_norm": 1.8687603509839055, - "learning_rate": 1.367042598386159e-08, + "grad_norm": 1.8516740082344818, + "learning_rate": 1.4548951775563703e-08, "loss": 0.5983, - "step": 9286 + "step": 13760 }, { "epoch": 0.98, - "grad_norm": 2.603423975622383, - "learning_rate": 1.3544778370246924e-08, - "loss": 0.6098, - "step": 9287 + "grad_norm": 1.8769304485977745, + "learning_rate": 1.4461481691912527e-08, + "loss": 0.5311, + "step": 13761 }, { "epoch": 0.98, - "grad_norm": 2.520998350761481, - "learning_rate": 1.3419710068585668e-08, - "loss": 0.5934, - "step": 9288 + "grad_norm": 1.8802250547184778, + "learning_rate": 1.4374274961299062e-08, + "loss": 0.5204, + "step": 13762 }, { "epoch": 0.98, - "grad_norm": 4.649199879398089, - "learning_rate": 1.3295221093407862e-08, - "loss": 0.6294, - "step": 9289 + "grad_norm": 2.1710117584091067, + "learning_rate": 1.4287331588330178e-08, + "loss": 0.5149, + "step": 13763 }, { "epoch": 0.98, - "grad_norm": 3.3152207409028662, - "learning_rate": 1.3171311459175829e-08, - "loss": 0.6102, - "step": 9290 + "grad_norm": 2.091972570829777, + "learning_rate": 1.4200651577598312e-08, + "loss": 0.4941, + "step": 13764 }, { "epoch": 0.98, - "grad_norm": 8.91183070336439, - "learning_rate": 1.3047981180285274e-08, - "loss": 0.5617, - "step": 9291 + "grad_norm": 1.6682376636246679, + "learning_rate": 1.411423493368258e-08, + "loss": 0.6043, + "step": 13765 }, { "epoch": 0.98, - "grad_norm": 2.536923939716204, - "learning_rate": 1.2925230271064736e-08, - "loss": 0.6005, - "step": 9292 + "grad_norm": 2.067810273698281, + "learning_rate": 1.4028081661147108e-08, + "loss": 0.522, + "step": 13766 }, { "epoch": 0.98, - "grad_norm": 2.5935899938613978, - "learning_rate": 1.2803058745774477e-08, - "loss": 0.5446, - "step": 9293 + "grad_norm": 2.236525920051452, + "learning_rate": 1.3942191764543255e-08, + "loss": 0.5641, + "step": 13767 }, { "epoch": 0.98, - "grad_norm": 2.67588488131424, - "learning_rate": 1.2681466618608696e-08, - "loss": 0.6637, - "step": 9294 + "grad_norm": 1.6859805640907688, + "learning_rate": 1.385656524840795e-08, + "loss": 0.5591, + "step": 13768 }, { "epoch": 0.98, - "grad_norm": 2.298948783588412, - "learning_rate": 1.256045390369276e-08, - "loss": 0.5459, - "step": 9295 + "grad_norm": 1.7206930308220043, + "learning_rate": 1.3771202117264237e-08, + "loss": 0.4978, + "step": 13769 }, { "epoch": 0.98, - "grad_norm": 3.32146484587026, - "learning_rate": 1.2440020615086534e-08, - "loss": 0.6503, - "step": 9296 + "grad_norm": 1.6164930876480414, + "learning_rate": 1.368610237562129e-08, + "loss": 0.4979, + "step": 13770 }, { "epoch": 0.98, - "grad_norm": 2.382979182133977, - "learning_rate": 1.232016676678105e-08, - "loss": 0.5351, - "step": 9297 + "grad_norm": 1.6941596001084085, + "learning_rate": 1.3601266027973848e-08, + "loss": 0.5456, + "step": 13771 }, { "epoch": 0.98, - "grad_norm": 2.8108107342818927, - "learning_rate": 1.2200892372700168e-08, - "loss": 0.5628, - "step": 9298 + "grad_norm": 1.8742478864970795, + "learning_rate": 1.3516693078804432e-08, + "loss": 0.5161, + "step": 13772 }, { "epoch": 0.98, - "grad_norm": 2.5429167783954405, - "learning_rate": 1.2082197446701693e-08, - "loss": 0.5136, - "step": 9299 + "grad_norm": 1.6533949546834341, + "learning_rate": 1.3432383532579474e-08, + "loss": 0.5342, + "step": 13773 }, { "epoch": 0.98, - "grad_norm": 2.446935675654442, - "learning_rate": 1.196408200257515e-08, - "loss": 0.5591, - "step": 9300 + "grad_norm": 1.697619091249961, + "learning_rate": 1.334833739375263e-08, + "loss": 0.5169, + "step": 13774 }, { "epoch": 0.98, - "grad_norm": 2.4390888642246606, - "learning_rate": 1.1846546054042341e-08, - "loss": 0.63, - "step": 9301 + "grad_norm": 1.6500453690624288, + "learning_rate": 1.3264554666763685e-08, + "loss": 0.5578, + "step": 13775 }, { "epoch": 0.98, - "grad_norm": 3.182765263784984, - "learning_rate": 1.1729589614758452e-08, - "loss": 0.6281, - "step": 9302 + "grad_norm": 1.6164268654748593, + "learning_rate": 1.318103535603854e-08, + "loss": 0.5356, + "step": 13776 }, { "epoch": 0.98, - "grad_norm": 2.6515402589211248, - "learning_rate": 1.1613212698311504e-08, - "loss": 0.5422, - "step": 9303 + "grad_norm": 1.693258800491067, + "learning_rate": 1.3097779465989225e-08, + "loss": 0.5789, + "step": 13777 }, { "epoch": 0.98, - "grad_norm": 3.006167180494317, - "learning_rate": 1.1497415318221239e-08, - "loss": 0.5991, - "step": 9304 + "grad_norm": 0.7027221266563234, + "learning_rate": 1.3014787001012219e-08, + "loss": 0.4478, + "step": 13778 }, { "epoch": 0.98, - "grad_norm": 2.6423248229678973, - "learning_rate": 1.1382197487941337e-08, - "loss": 0.5174, - "step": 9305 + "grad_norm": 1.871834528035164, + "learning_rate": 1.2932057965492905e-08, + "loss": 0.605, + "step": 13779 }, { "epoch": 0.98, - "grad_norm": 2.713564676817919, - "learning_rate": 1.1267559220857204e-08, - "loss": 0.6037, - "step": 9306 + "grad_norm": 1.567817924211742, + "learning_rate": 1.2849592363801122e-08, + "loss": 0.516, + "step": 13780 }, { "epoch": 0.98, - "grad_norm": 2.653891032493835, - "learning_rate": 1.1153500530286521e-08, - "loss": 0.5838, - "step": 9307 + "grad_norm": 1.8229754701452192, + "learning_rate": 1.2767390200292274e-08, + "loss": 0.4755, + "step": 13781 }, { "epoch": 0.98, - "grad_norm": 3.3097916904344333, - "learning_rate": 1.1040021429480907e-08, - "loss": 0.5577, - "step": 9308 + "grad_norm": 0.7349779189445561, + "learning_rate": 1.2685451479308442e-08, + "loss": 0.4146, + "step": 13782 }, { "epoch": 0.98, - "grad_norm": 2.68913204932153, - "learning_rate": 1.0927121931624263e-08, - "loss": 0.6034, - "step": 9309 + "grad_norm": 1.6602550782095997, + "learning_rate": 1.2603776205178941e-08, + "loss": 0.5004, + "step": 13783 }, { "epoch": 0.98, - "grad_norm": 3.6960977207308368, - "learning_rate": 1.0814802049832762e-08, - "loss": 0.5788, - "step": 9310 + "grad_norm": 1.6212497271851576, + "learning_rate": 1.2522364382217544e-08, + "loss": 0.4948, + "step": 13784 }, { "epoch": 0.98, - "grad_norm": 2.5549598391454404, - "learning_rate": 1.0703061797154857e-08, - "loss": 0.6632, - "step": 9311 + "grad_norm": 2.3892271486532097, + "learning_rate": 1.24412160147247e-08, + "loss": 0.5719, + "step": 13785 }, { "epoch": 0.98, - "grad_norm": 3.4964467940181576, - "learning_rate": 1.059190118657294e-08, - "loss": 0.5849, - "step": 9312 + "grad_norm": 1.7540980975702138, + "learning_rate": 1.2360331106986979e-08, + "loss": 0.5109, + "step": 13786 }, { "epoch": 0.98, - "grad_norm": 3.4191869666444825, - "learning_rate": 1.0481320231001124e-08, - "loss": 0.6446, - "step": 9313 + "grad_norm": 1.740212610434419, + "learning_rate": 1.2279709663277073e-08, + "loss": 0.4735, + "step": 13787 }, { "epoch": 0.98, - "grad_norm": 2.233757773597287, - "learning_rate": 1.0371318943285802e-08, - "loss": 0.5923, - "step": 9314 + "grad_norm": 1.7820749849831707, + "learning_rate": 1.21993516878538e-08, + "loss": 0.4826, + "step": 13788 }, { "epoch": 0.98, - "grad_norm": 3.207322331157825, - "learning_rate": 1.0261897336207305e-08, - "loss": 0.617, - "step": 9315 + "grad_norm": 0.7766033991733574, + "learning_rate": 1.2119257184960986e-08, + "loss": 0.4345, + "step": 13789 }, { "epoch": 0.98, - "grad_norm": 3.9505737873093274, - "learning_rate": 1.0153055422477686e-08, - "loss": 0.6109, - "step": 9316 + "grad_norm": 2.737395370239475, + "learning_rate": 1.2039426158830802e-08, + "loss": 0.4403, + "step": 13790 }, { "epoch": 0.98, - "grad_norm": 3.9526141318436947, - "learning_rate": 1.0044793214742387e-08, - "loss": 0.6762, - "step": 9317 + "grad_norm": 1.8670576133552697, + "learning_rate": 1.1959858613679875e-08, + "loss": 0.5055, + "step": 13791 }, { "epoch": 0.98, - "grad_norm": 3.42531517384415, - "learning_rate": 9.937110725578015e-09, - "loss": 0.6146, - "step": 9318 + "grad_norm": 1.6188737783961151, + "learning_rate": 1.18805545537104e-08, + "loss": 0.5188, + "step": 13792 + }, + { + "epoch": 0.98, + "grad_norm": 2.022036712738585, + "learning_rate": 1.1801513983112356e-08, + "loss": 0.5604, + "step": 13793 + }, + { + "epoch": 0.98, + "grad_norm": 1.5197650742101925, + "learning_rate": 1.1722736906060738e-08, + "loss": 0.4555, + "step": 13794 + }, + { + "epoch": 0.98, + "grad_norm": 1.821502823667854, + "learning_rate": 1.1644223326716663e-08, + "loss": 0.4335, + "step": 13795 + }, + { + "epoch": 0.98, + "grad_norm": 1.9506144089178716, + "learning_rate": 1.1565973249227924e-08, + "loss": 0.4837, + "step": 13796 + }, + { + "epoch": 0.98, + "grad_norm": 2.0832846143893833, + "learning_rate": 1.1487986677727326e-08, + "loss": 0.4801, + "step": 13797 + }, + { + "epoch": 0.98, + "grad_norm": 1.9942276950610516, + "learning_rate": 1.141026361633435e-08, + "loss": 0.5418, + "step": 13798 + }, + { + "epoch": 0.98, + "grad_norm": 2.219015473775589, + "learning_rate": 1.1332804069155156e-08, + "loss": 0.5032, + "step": 13799 }, { "epoch": 0.98, - "grad_norm": 2.143828176250109, - "learning_rate": 9.83000796749567e-09, + "grad_norm": 1.7275231383627365, + "learning_rate": 1.1255608040281473e-08, "loss": 0.5419, - "step": 9319 + "step": 13800 }, { "epoch": 0.98, - "grad_norm": 2.7349892039380914, - "learning_rate": 9.723484952937623e-09, - "loss": 0.6015, - "step": 9320 + "grad_norm": 1.7300486822606524, + "learning_rate": 1.117867553379004e-08, + "loss": 0.5255, + "step": 13801 }, { "epoch": 0.98, - "grad_norm": 2.880357843332127, - "learning_rate": 9.617541694279532e-09, - "loss": 0.5888, - "step": 9321 + "grad_norm": 1.6940142992592748, + "learning_rate": 1.1102006553745936e-08, + "loss": 0.5652, + "step": 13802 }, { "epoch": 0.98, - "grad_norm": 3.046184759076013, - "learning_rate": 9.512178203829881e-09, - "loss": 0.5733, - "step": 9322 + "grad_norm": 1.8182172858358334, + "learning_rate": 1.1025601104198702e-08, + "loss": 0.5749, + "step": 13803 }, { "epoch": 0.98, - "grad_norm": 2.0691863659915253, - "learning_rate": 9.407394493829436e-09, - "loss": 0.5912, - "step": 9323 + "grad_norm": 1.7914581734291233, + "learning_rate": 1.0949459189183442e-08, + "loss": 0.452, + "step": 13804 }, { "epoch": 0.98, - "grad_norm": 3.048172174777919, - "learning_rate": 9.303190576451237e-09, - "loss": 0.641, - "step": 9324 + "grad_norm": 1.9040933566529674, + "learning_rate": 1.0873580812723605e-08, + "loss": 0.5357, + "step": 13805 }, { "epoch": 0.98, - "grad_norm": 6.232433634613707, - "learning_rate": 9.199566463801712e-09, - "loss": 0.583, - "step": 9325 + "grad_norm": 1.7187671095964874, + "learning_rate": 1.0797965978826541e-08, + "loss": 0.5223, + "step": 13806 }, { "epoch": 0.98, - "grad_norm": 2.7792858367309243, - "learning_rate": 9.09652216792012e-09, - "loss": 0.6, - "step": 9326 + "grad_norm": 0.6170376708699634, + "learning_rate": 1.0722614691486832e-08, + "loss": 0.4004, + "step": 13807 }, { "epoch": 0.98, - "grad_norm": 3.136354810393291, - "learning_rate": 8.994057700776881e-09, - "loss": 0.5886, - "step": 9327 + "grad_norm": 1.7387855870329176, + "learning_rate": 1.0647526954684073e-08, + "loss": 0.5659, + "step": 13808 }, { "epoch": 0.98, - "grad_norm": 7.469179826762709, - "learning_rate": 8.892173074276921e-09, - "loss": 0.6292, - "step": 9328 + "grad_norm": 1.8794910765745756, + "learning_rate": 1.0572702772385645e-08, + "loss": 0.5344, + "step": 13809 }, { "epoch": 0.98, - "grad_norm": 2.404099135191639, - "learning_rate": 8.790868300255773e-09, - "loss": 0.6475, - "step": 9329 + "grad_norm": 2.446154487956777, + "learning_rate": 1.0498142148543388e-08, + "loss": 0.5256, + "step": 13810 }, { "epoch": 0.98, - "grad_norm": 3.296160070077247, - "learning_rate": 8.690143390484018e-09, - "loss": 0.6183, - "step": 9330 + "grad_norm": 1.4398546129368928, + "learning_rate": 1.0423845087095818e-08, + "loss": 0.4269, + "step": 13811 }, { "epoch": 0.98, - "grad_norm": 2.6296150848981825, - "learning_rate": 8.589998356662854e-09, - "loss": 0.6401, - "step": 9331 + "grad_norm": 1.7924797899392848, + "learning_rate": 1.0349811591967573e-08, + "loss": 0.5177, + "step": 13812 }, { "epoch": 0.98, - "grad_norm": 3.253263381499738, - "learning_rate": 8.490433210426862e-09, - "loss": 0.6144, - "step": 9332 + "grad_norm": 0.7202270786782777, + "learning_rate": 1.0276041667069968e-08, + "loss": 0.4169, + "step": 13813 }, { "epoch": 0.98, - "grad_norm": 3.110522658122041, - "learning_rate": 8.391447963343457e-09, - "loss": 0.5804, - "step": 9333 + "grad_norm": 1.708925492199874, + "learning_rate": 1.0202535316299334e-08, + "loss": 0.481, + "step": 13814 }, { "epoch": 0.98, - "grad_norm": 6.067607575219891, - "learning_rate": 8.293042626912328e-09, - "loss": 0.5936, - "step": 9334 + "grad_norm": 1.5330440619883268, + "learning_rate": 1.0129292543538115e-08, + "loss": 0.5065, + "step": 13815 }, { "epoch": 0.98, - "grad_norm": 2.5019568835203203, - "learning_rate": 8.195217212565998e-09, - "loss": 0.6324, - "step": 9335 + "grad_norm": 1.7823191972984158, + "learning_rate": 1.0056313352656e-08, + "loss": 0.4949, + "step": 13816 + }, + { + "epoch": 0.98, + "grad_norm": 2.6583394898306665, + "learning_rate": 9.983597747507679e-09, + "loss": 0.4923, + "step": 13817 + }, + { + "epoch": 0.98, + "grad_norm": 1.609245772614628, + "learning_rate": 9.911145731934524e-09, + "loss": 0.521, + "step": 13818 + }, + { + "epoch": 0.98, + "grad_norm": 1.665619734142658, + "learning_rate": 9.838957309762365e-09, + "loss": 0.4996, + "step": 13819 + }, + { + "epoch": 0.98, + "grad_norm": 1.8601877603949746, + "learning_rate": 9.767032484806482e-09, + "loss": 0.5017, + "step": 13820 + }, + { + "epoch": 0.98, + "grad_norm": 1.9462619207328242, + "learning_rate": 9.695371260864394e-09, + "loss": 0.5226, + "step": 13821 + }, + { + "epoch": 0.98, + "grad_norm": 1.5327915404748742, + "learning_rate": 9.623973641723072e-09, + "loss": 0.4599, + "step": 13822 + }, + { + "epoch": 0.98, + "grad_norm": 1.640183305775688, + "learning_rate": 9.552839631152277e-09, + "loss": 0.4661, + "step": 13823 + }, + { + "epoch": 0.98, + "grad_norm": 1.546458784191896, + "learning_rate": 9.481969232911226e-09, + "loss": 0.5599, + "step": 13824 + }, + { + "epoch": 0.98, + "grad_norm": 1.7515434366211393, + "learning_rate": 9.411362450742479e-09, + "loss": 0.4771, + "step": 13825 + }, + { + "epoch": 0.98, + "grad_norm": 1.6868595488614635, + "learning_rate": 9.34101928837583e-09, + "loss": 0.485, + "step": 13826 + }, + { + "epoch": 0.98, + "grad_norm": 2.033065196525684, + "learning_rate": 9.270939749527197e-09, + "loss": 0.5659, + "step": 13827 + }, + { + "epoch": 0.98, + "grad_norm": 1.7708692838640576, + "learning_rate": 9.201123837898063e-09, + "loss": 0.478, + "step": 13828 + }, + { + "epoch": 0.98, + "grad_norm": 3.1758656021872973, + "learning_rate": 9.131571557177144e-09, + "loss": 0.5817, + "step": 13829 + }, + { + "epoch": 0.98, + "grad_norm": 0.7473160868376661, + "learning_rate": 9.062282911038167e-09, + "loss": 0.444, + "step": 13830 + }, + { + "epoch": 0.98, + "grad_norm": 1.4330398745562019, + "learning_rate": 8.993257903140984e-09, + "loss": 0.4856, + "step": 13831 + }, + { + "epoch": 0.98, + "grad_norm": 1.6292561653557966, + "learning_rate": 8.924496537131566e-09, + "loss": 0.5085, + "step": 13832 + }, + { + "epoch": 0.98, + "grad_norm": 1.8082917433884704, + "learning_rate": 8.855998816642008e-09, + "loss": 0.4301, + "step": 13833 + }, + { + "epoch": 0.98, + "grad_norm": 2.002856632619029, + "learning_rate": 8.787764745291638e-09, + "loss": 0.4838, + "step": 13834 + }, + { + "epoch": 0.98, + "grad_norm": 0.6759347153878912, + "learning_rate": 8.719794326683128e-09, + "loss": 0.4177, + "step": 13835 + }, + { + "epoch": 0.98, + "grad_norm": 2.2721507454970955, + "learning_rate": 8.652087564408607e-09, + "loss": 0.4889, + "step": 13836 + }, + { + "epoch": 0.98, + "grad_norm": 1.5929412683874062, + "learning_rate": 8.584644462043545e-09, + "loss": 0.4582, + "step": 13837 + }, + { + "epoch": 0.98, + "grad_norm": 1.7897137741009035, + "learning_rate": 8.51746502315065e-09, + "loss": 0.581, + "step": 13838 + }, + { + "epoch": 0.98, + "grad_norm": 1.7996306555655746, + "learning_rate": 8.450549251279306e-09, + "loss": 0.5695, + "step": 13839 + }, + { + "epoch": 0.98, + "grad_norm": 1.5273339002060116, + "learning_rate": 8.383897149962794e-09, + "loss": 0.4999, + "step": 13840 + }, + { + "epoch": 0.98, + "grad_norm": 1.5750512914346113, + "learning_rate": 8.3175087227233e-09, + "loss": 0.4606, + "step": 13841 + }, + { + "epoch": 0.98, + "grad_norm": 1.7219578697266564, + "learning_rate": 8.251383973066907e-09, + "loss": 0.5405, + "step": 13842 + }, + { + "epoch": 0.98, + "grad_norm": 2.1452607296480357, + "learning_rate": 8.185522904486932e-09, + "loss": 0.4852, + "step": 13843 + }, + { + "epoch": 0.98, + "grad_norm": 1.8451043425648626, + "learning_rate": 8.119925520462257e-09, + "loss": 0.5452, + "step": 13844 + }, + { + "epoch": 0.98, + "grad_norm": 1.6680890690164312, + "learning_rate": 8.054591824457892e-09, + "loss": 0.5474, + "step": 13845 + }, + { + "epoch": 0.98, + "grad_norm": 1.4726365788283373, + "learning_rate": 7.989521819924406e-09, + "loss": 0.4801, + "step": 13846 + }, + { + "epoch": 0.98, + "grad_norm": 1.6871755353559854, + "learning_rate": 7.924715510300162e-09, + "loss": 0.5784, + "step": 13847 + }, + { + "epoch": 0.98, + "grad_norm": 1.677132360115063, + "learning_rate": 7.860172899007978e-09, + "loss": 0.5126, + "step": 13848 + }, + { + "epoch": 0.98, + "grad_norm": 2.3084543466001453, + "learning_rate": 7.795893989456792e-09, + "loss": 0.5274, + "step": 13849 + }, + { + "epoch": 0.98, + "grad_norm": 1.6855330125619785, + "learning_rate": 7.731878785042778e-09, + "loss": 0.5167, + "step": 13850 + }, + { + "epoch": 0.98, + "grad_norm": 0.6613322886448719, + "learning_rate": 7.668127289147121e-09, + "loss": 0.4073, + "step": 13851 + }, + { + "epoch": 0.98, + "grad_norm": 1.917431623101421, + "learning_rate": 7.604639505136568e-09, + "loss": 0.4905, + "step": 13852 + }, + { + "epoch": 0.98, + "grad_norm": 1.855896296893096, + "learning_rate": 7.541415436366218e-09, + "loss": 0.5275, + "step": 13853 + }, + { + "epoch": 0.98, + "grad_norm": 1.6817356830093526, + "learning_rate": 7.478455086174507e-09, + "loss": 0.4919, + "step": 13854 + }, + { + "epoch": 0.98, + "grad_norm": 1.8443558426653477, + "learning_rate": 7.4157584578882226e-09, + "loss": 0.482, + "step": 13855 + }, + { + "epoch": 0.98, + "grad_norm": 1.9091181551897176, + "learning_rate": 7.353325554818603e-09, + "loss": 0.4888, + "step": 13856 + }, + { + "epoch": 0.98, + "grad_norm": 0.6500583303331111, + "learning_rate": 7.291156380264119e-09, + "loss": 0.4362, + "step": 13857 + }, + { + "epoch": 0.98, + "grad_norm": 1.835000603367823, + "learning_rate": 7.229250937507704e-09, + "loss": 0.5278, + "step": 13858 + }, + { + "epoch": 0.98, + "grad_norm": 1.6899015645233655, + "learning_rate": 7.167609229820627e-09, + "loss": 0.5693, + "step": 13859 + }, + { + "epoch": 0.98, + "grad_norm": 0.6499629607362998, + "learning_rate": 7.106231260458063e-09, + "loss": 0.4197, + "step": 13860 + }, + { + "epoch": 0.98, + "grad_norm": 2.4721431021543365, + "learning_rate": 7.045117032662419e-09, + "loss": 0.4928, + "step": 13861 + }, + { + "epoch": 0.98, + "grad_norm": 5.33074914229362, + "learning_rate": 6.984266549662777e-09, + "loss": 0.506, + "step": 13862 + }, + { + "epoch": 0.98, + "grad_norm": 1.9376528020824142, + "learning_rate": 6.923679814672124e-09, + "loss": 0.5119, + "step": 13863 + }, + { + "epoch": 0.98, + "grad_norm": 1.6870020684056182, + "learning_rate": 6.8633568308917875e-09, + "loss": 0.4815, + "step": 13864 + }, + { + "epoch": 0.98, + "grad_norm": 1.5241698549444123, + "learning_rate": 6.803297601508108e-09, + "loss": 0.5783, + "step": 13865 + }, + { + "epoch": 0.98, + "grad_norm": 1.6623960127782504, + "learning_rate": 6.7435021296935464e-09, + "loss": 0.4282, + "step": 13866 + }, + { + "epoch": 0.98, + "grad_norm": 2.280188502268781, + "learning_rate": 6.6839704186066885e-09, + "loss": 0.5094, + "step": 13867 + }, + { + "epoch": 0.98, + "grad_norm": 1.8100413830189652, + "learning_rate": 6.6247024713922415e-09, + "loss": 0.5275, + "step": 13868 + }, + { + "epoch": 0.98, + "grad_norm": 1.6687045949892414, + "learning_rate": 6.5656982911810326e-09, + "loss": 0.4763, + "step": 13869 + }, + { + "epoch": 0.98, + "grad_norm": 0.6463604907448521, + "learning_rate": 6.50695788108946e-09, + "loss": 0.4314, + "step": 13870 + }, + { + "epoch": 0.98, + "grad_norm": 2.2368923096007935, + "learning_rate": 6.4484812442205946e-09, + "loss": 0.6394, + "step": 13871 + }, + { + "epoch": 0.98, + "grad_norm": 2.217003591847839, + "learning_rate": 6.390268383663079e-09, + "loss": 0.508, + "step": 13872 + }, + { + "epoch": 0.98, + "grad_norm": 2.1030559816349905, + "learning_rate": 6.332319302492784e-09, + "loss": 0.4828, + "step": 13873 + }, + { + "epoch": 0.98, + "grad_norm": 2.708979611688709, + "learning_rate": 6.274634003770042e-09, + "loss": 0.5032, + "step": 13874 + }, + { + "epoch": 0.98, + "grad_norm": 1.5336497619764464, + "learning_rate": 6.2172124905418575e-09, + "loss": 0.5191, + "step": 13875 + }, + { + "epoch": 0.98, + "grad_norm": 1.6554497310084306, + "learning_rate": 6.160054765842471e-09, + "loss": 0.5257, + "step": 13876 + }, + { + "epoch": 0.98, + "grad_norm": 2.506916971894586, + "learning_rate": 6.103160832690025e-09, + "loss": 0.5493, + "step": 13877 + }, + { + "epoch": 0.98, + "grad_norm": 1.9838467567041875, + "learning_rate": 6.046530694090447e-09, + "loss": 0.5102, + "step": 13878 + }, + { + "epoch": 0.98, + "grad_norm": 2.342547441041773, + "learning_rate": 5.990164353034678e-09, + "loss": 0.5465, + "step": 13879 + }, + { + "epoch": 0.98, + "grad_norm": 1.7426838837865224, + "learning_rate": 5.9340618125003355e-09, + "loss": 0.51, + "step": 13880 + }, + { + "epoch": 0.99, + "grad_norm": 1.7736237171131768, + "learning_rate": 5.878223075451162e-09, + "loss": 0.4988, + "step": 13881 + }, + { + "epoch": 0.99, + "grad_norm": 1.5247088289740156, + "learning_rate": 5.822648144837018e-09, + "loss": 0.4897, + "step": 13882 + }, + { + "epoch": 0.99, + "grad_norm": 1.5616947752876682, + "learning_rate": 5.767337023592778e-09, + "loss": 0.4589, + "step": 13883 + }, + { + "epoch": 0.99, + "grad_norm": 1.5685325850546339, + "learning_rate": 5.712289714640551e-09, + "loss": 0.5455, + "step": 13884 + }, + { + "epoch": 0.99, + "grad_norm": 2.9541701217059058, + "learning_rate": 5.657506220888564e-09, + "loss": 0.461, + "step": 13885 + }, + { + "epoch": 0.99, + "grad_norm": 2.6722364587580145, + "learning_rate": 5.602986545229505e-09, + "loss": 0.4726, + "step": 13886 + }, + { + "epoch": 0.99, + "grad_norm": 1.7392507931891652, + "learning_rate": 5.5487306905444016e-09, + "loss": 0.5541, + "step": 13887 + }, + { + "epoch": 0.99, + "grad_norm": 0.652687566116949, + "learning_rate": 5.494738659699295e-09, + "loss": 0.4252, + "step": 13888 + }, + { + "epoch": 0.99, + "grad_norm": 1.8258839186846314, + "learning_rate": 5.441010455545237e-09, + "loss": 0.5279, + "step": 13889 + }, + { + "epoch": 0.99, + "grad_norm": 2.0291273625170705, + "learning_rate": 5.3875460809210685e-09, + "loss": 0.5974, + "step": 13890 + }, + { + "epoch": 0.99, + "grad_norm": 2.2429992029608097, + "learning_rate": 5.334345538650643e-09, + "loss": 0.4241, + "step": 13891 + }, + { + "epoch": 0.99, + "grad_norm": 1.672379074001293, + "learning_rate": 5.281408831544488e-09, + "loss": 0.5423, + "step": 13892 + }, + { + "epoch": 0.99, + "grad_norm": 1.5476875423475707, + "learning_rate": 5.2287359623987014e-09, + "loss": 0.4868, + "step": 13893 + }, + { + "epoch": 0.99, + "grad_norm": 1.6087314362824645, + "learning_rate": 5.176326933995501e-09, + "loss": 0.4923, + "step": 13894 + }, + { + "epoch": 0.99, + "grad_norm": 1.6457920747713095, + "learning_rate": 5.124181749103785e-09, + "loss": 0.4647, + "step": 13895 + }, + { + "epoch": 0.99, + "grad_norm": 2.071348628878063, + "learning_rate": 5.07230041047746e-09, + "loss": 0.5072, + "step": 13896 + }, + { + "epoch": 0.99, + "grad_norm": 1.5088108342926303, + "learning_rate": 5.020682920857667e-09, + "loss": 0.4582, + "step": 13897 + }, + { + "epoch": 0.99, + "grad_norm": 1.9823024993310459, + "learning_rate": 4.9693292829705585e-09, + "loss": 0.535, + "step": 13898 + }, + { + "epoch": 0.99, + "grad_norm": 4.984130235873916, + "learning_rate": 4.918239499528965e-09, + "loss": 0.5306, + "step": 13899 + }, + { + "epoch": 0.99, + "grad_norm": 1.6703747466500334, + "learning_rate": 4.867413573231283e-09, + "loss": 0.5435, + "step": 13900 + }, + { + "epoch": 0.99, + "grad_norm": 1.9087075778644362, + "learning_rate": 4.816851506763143e-09, + "loss": 0.5171, + "step": 13901 + }, + { + "epoch": 0.99, + "grad_norm": 1.5545800476244913, + "learning_rate": 4.76655330279463e-09, + "loss": 0.5583, + "step": 13902 }, { - "epoch": 0.98, - "grad_norm": 2.925396010634107, - "learning_rate": 8.097971731669263e-09, - "loss": 0.583, - "step": 9336 + "epoch": 0.99, + "grad_norm": 1.7249920327573967, + "learning_rate": 4.716518963983063e-09, + "loss": 0.52, + "step": 13903 }, { - "epoch": 0.98, - "grad_norm": 3.319061858545403, - "learning_rate": 8.001306195520309e-09, - "loss": 0.6931, - "step": 9337 + "epoch": 0.99, + "grad_norm": 1.5068479306904612, + "learning_rate": 4.6667484929713295e-09, + "loss": 0.5283, + "step": 13904 }, { - "epoch": 0.98, - "grad_norm": 2.2308896693198204, - "learning_rate": 7.90522061534904e-09, - "loss": 0.5912, - "step": 9338 + "epoch": 0.99, + "grad_norm": 1.6898277712666616, + "learning_rate": 4.617241892387881e-09, + "loss": 0.5209, + "step": 13905 }, { - "epoch": 0.98, - "grad_norm": 2.557196137943084, - "learning_rate": 7.809715002318751e-09, - "loss": 0.6286, - "step": 9339 + "epoch": 0.99, + "grad_norm": 2.8169903292066514, + "learning_rate": 4.567999164848957e-09, + "loss": 0.5395, + "step": 13906 }, { - "epoch": 0.98, - "grad_norm": 2.8420399596277703, - "learning_rate": 7.714789367524456e-09, - "loss": 0.5345, - "step": 9340 + "epoch": 0.99, + "grad_norm": 1.6725218175966918, + "learning_rate": 4.519020312955258e-09, + "loss": 0.5558, + "step": 13907 }, { - "epoch": 0.98, - "grad_norm": 2.9739842730635013, - "learning_rate": 7.620443721995107e-09, - "loss": 0.6429, - "step": 9341 + "epoch": 0.99, + "grad_norm": 2.2013663746533934, + "learning_rate": 4.470305339293601e-09, + "loss": 0.5089, + "step": 13908 }, { - "epoch": 0.98, - "grad_norm": 2.265940840873313, - "learning_rate": 7.52667807669083e-09, - "loss": 0.5545, - "step": 9342 + "epoch": 0.99, + "grad_norm": 1.7488237631330972, + "learning_rate": 4.421854246437485e-09, + "loss": 0.4673, + "step": 13909 }, { - "epoch": 0.98, - "grad_norm": 2.287899726527495, - "learning_rate": 7.43349244250513e-09, - "loss": 0.6421, - "step": 9343 + "epoch": 0.99, + "grad_norm": 2.1380639737179044, + "learning_rate": 4.373667036946527e-09, + "loss": 0.5407, + "step": 13910 }, { - "epoch": 0.98, - "grad_norm": 2.5574409267349916, - "learning_rate": 7.340886830264904e-09, - "loss": 0.6416, - "step": 9344 + "epoch": 0.99, + "grad_norm": 1.9801680822509038, + "learning_rate": 4.3257437133659156e-09, + "loss": 0.5244, + "step": 13911 }, { - "epoch": 0.98, - "grad_norm": 2.8843132936950244, - "learning_rate": 7.2488612507276564e-09, - "loss": 0.6486, - "step": 9345 + "epoch": 0.99, + "grad_norm": 1.696714752220602, + "learning_rate": 4.278084278227513e-09, + "loss": 0.5448, + "step": 13912 }, { - "epoch": 0.98, - "grad_norm": 3.5891319217005697, - "learning_rate": 7.157415714584836e-09, - "loss": 0.6042, - "step": 9346 + "epoch": 0.99, + "grad_norm": 1.8332027599914975, + "learning_rate": 4.230688734048194e-09, + "loss": 0.5161, + "step": 13913 }, { - "epoch": 0.98, - "grad_norm": 3.5665853233741407, - "learning_rate": 7.066550232461278e-09, - "loss": 0.5836, - "step": 9347 + "epoch": 0.99, + "grad_norm": 1.6788906232196072, + "learning_rate": 4.183557083331513e-09, + "loss": 0.4956, + "step": 13914 }, { - "epoch": 0.98, - "grad_norm": 2.156939647962542, - "learning_rate": 6.976264814912426e-09, - "loss": 0.6158, - "step": 9348 + "epoch": 0.99, + "grad_norm": 2.665141528603961, + "learning_rate": 4.136689328568255e-09, + "loss": 0.5346, + "step": 13915 }, { - "epoch": 0.98, - "grad_norm": 2.318404717092306, - "learning_rate": 6.886559472427667e-09, - "loss": 0.5961, - "step": 9349 + "epoch": 0.99, + "grad_norm": 2.06264423543389, + "learning_rate": 4.090085472232552e-09, + "loss": 0.4728, + "step": 13916 }, { - "epoch": 0.98, - "grad_norm": 0.9679186604158867, - "learning_rate": 6.797434215429222e-09, - "loss": 0.5583, - "step": 9350 + "epoch": 0.99, + "grad_norm": 1.7265083359904163, + "learning_rate": 4.043745516787434e-09, + "loss": 0.5551, + "step": 13917 }, { - "epoch": 0.98, - "grad_norm": 2.882857695190735, - "learning_rate": 6.708889054270473e-09, - "loss": 0.6257, - "step": 9351 + "epoch": 0.99, + "grad_norm": 3.7093988881454796, + "learning_rate": 3.997669464680387e-09, + "loss": 0.5144, + "step": 13918 }, { - "epoch": 0.98, - "grad_norm": 2.9855183026376255, - "learning_rate": 6.620923999239304e-09, - "loss": 0.5986, - "step": 9352 + "epoch": 0.99, + "grad_norm": 9.420997339837395, + "learning_rate": 3.95185731834502e-09, + "loss": 0.4495, + "step": 13919 }, { - "epoch": 0.98, - "grad_norm": 2.965482349734234, - "learning_rate": 6.533539060554761e-09, - "loss": 0.5749, - "step": 9353 + "epoch": 0.99, + "grad_norm": 2.1353348231993943, + "learning_rate": 3.90630908020162e-09, + "loss": 0.517, + "step": 13920 }, { - "epoch": 0.98, - "grad_norm": 2.4913808973827263, - "learning_rate": 6.446734248368725e-09, - "loss": 0.6426, - "step": 9354 + "epoch": 0.99, + "grad_norm": 1.8497608698838504, + "learning_rate": 3.86102475265604e-09, + "loss": 0.5592, + "step": 13921 }, { - "epoch": 0.98, - "grad_norm": 2.7932074858752802, - "learning_rate": 6.360509572765905e-09, - "loss": 0.5945, - "step": 9355 + "epoch": 0.99, + "grad_norm": 0.7697526741814533, + "learning_rate": 3.816004338100254e-09, + "loss": 0.4347, + "step": 13922 }, { - "epoch": 0.98, - "grad_norm": 2.200283514862744, - "learning_rate": 6.2748650437644e-09, - "loss": 0.6131, - "step": 9356 + "epoch": 0.99, + "grad_norm": 1.9174488541550874, + "learning_rate": 3.771247838912362e-09, + "loss": 0.5354, + "step": 13923 }, { - "epoch": 0.98, - "grad_norm": 4.443831408522394, - "learning_rate": 6.189800671314028e-09, - "loss": 0.6324, - "step": 9357 + "epoch": 0.99, + "grad_norm": 1.741001757147235, + "learning_rate": 3.726755257457137e-09, + "loss": 0.6207, + "step": 13924 }, { - "epoch": 0.98, - "grad_norm": 2.722697074400931, - "learning_rate": 6.10531646529633e-09, - "loss": 0.5599, - "step": 9358 + "epoch": 0.99, + "grad_norm": 2.20594735805295, + "learning_rate": 3.682526596083813e-09, + "loss": 0.5806, + "step": 13925 }, { - "epoch": 0.98, - "grad_norm": 2.8796778714377784, - "learning_rate": 6.021412435527341e-09, - "loss": 0.5826, - "step": 9359 + "epoch": 0.99, + "grad_norm": 2.6758119495167874, + "learning_rate": 3.6385618571294077e-09, + "loss": 0.5309, + "step": 13926 }, { - "epoch": 0.98, - "grad_norm": 2.1980873977664355, - "learning_rate": 5.938088591754265e-09, - "loss": 0.5682, - "step": 9360 + "epoch": 0.99, + "grad_norm": 1.7117890791853878, + "learning_rate": 3.5948610429165088e-09, + "loss": 0.581, + "step": 13927 }, { "epoch": 0.99, - "grad_norm": 2.506953681588526, - "learning_rate": 5.855344943658248e-09, - "loss": 0.6596, - "step": 9361 + "grad_norm": 1.7659112498408214, + "learning_rate": 3.5514241557532693e-09, + "loss": 0.502, + "step": 13928 }, { "epoch": 0.99, - "grad_norm": 2.4320110623826006, - "learning_rate": 5.773181500851044e-09, - "loss": 0.5329, - "step": 9362 + "grad_norm": 1.7278242861119757, + "learning_rate": 3.50825119793341e-09, + "loss": 0.5354, + "step": 13929 }, { "epoch": 0.99, - "grad_norm": 2.673093388240049, - "learning_rate": 5.691598272878907e-09, - "loss": 0.6882, - "step": 9363 + "grad_norm": 1.69051871516047, + "learning_rate": 3.4653421717384395e-09, + "loss": 0.5052, + "step": 13930 }, { "epoch": 0.99, - "grad_norm": 5.141915304940063, - "learning_rate": 5.610595269220364e-09, - "loss": 0.6534, - "step": 9364 + "grad_norm": 1.895585066326675, + "learning_rate": 3.422697079434878e-09, + "loss": 0.45, + "step": 13931 }, { "epoch": 0.99, - "grad_norm": 2.2742761378178487, - "learning_rate": 5.530172499285113e-09, - "loss": 0.5644, - "step": 9365 + "grad_norm": 5.990333237591098, + "learning_rate": 3.380315923275368e-09, + "loss": 0.5424, + "step": 13932 }, { "epoch": 0.99, - "grad_norm": 3.7078245584599014, - "learning_rate": 5.45032997241679e-09, - "loss": 0.6237, - "step": 9366 + "grad_norm": 1.4989741606332263, + "learning_rate": 3.338198705498119e-09, + "loss": 0.4121, + "step": 13933 }, { "epoch": 0.99, - "grad_norm": 2.1910604685165675, - "learning_rate": 5.371067697891308e-09, - "loss": 0.6524, - "step": 9367 + "grad_norm": 1.7664706485841073, + "learning_rate": 3.2963454283280184e-09, + "loss": 0.5436, + "step": 13934 }, { "epoch": 0.99, - "grad_norm": 2.263472973076441, - "learning_rate": 5.292385684917411e-09, - "loss": 0.5387, - "step": 9368 + "grad_norm": 1.7137145571715726, + "learning_rate": 3.2547560939760746e-09, + "loss": 0.4697, + "step": 13935 }, { "epoch": 0.99, - "grad_norm": 2.6018067358751327, - "learning_rate": 5.214283942635567e-09, - "loss": 0.5654, - "step": 9369 + "grad_norm": 1.5980731602798788, + "learning_rate": 3.21343070463942e-09, + "loss": 0.5161, + "step": 13936 }, { "epoch": 0.99, - "grad_norm": 3.20137961246332, - "learning_rate": 5.136762480120183e-09, - "loss": 0.5999, - "step": 9370 + "grad_norm": 1.8148072184784318, + "learning_rate": 3.1723692625007518e-09, + "loss": 0.4773, + "step": 13937 }, { "epoch": 0.99, - "grad_norm": 2.5099314930574357, - "learning_rate": 5.059821306376833e-09, - "loss": 0.5936, - "step": 9371 + "grad_norm": 0.7024169774295151, + "learning_rate": 3.1315717697294466e-09, + "loss": 0.402, + "step": 13938 }, { "epoch": 0.99, - "grad_norm": 2.70471458958764, - "learning_rate": 4.9834604303444774e-09, - "loss": 0.631, - "step": 9372 + "grad_norm": 1.7203502808674962, + "learning_rate": 3.091038228479892e-09, + "loss": 0.5037, + "step": 13939 }, { "epoch": 0.99, - "grad_norm": 3.1223721533213684, - "learning_rate": 4.907679860894355e-09, - "loss": 0.6049, - "step": 9373 + "grad_norm": 1.6770002337895986, + "learning_rate": 3.0507686408931536e-09, + "loss": 0.5337, + "step": 13940 }, { "epoch": 0.99, - "grad_norm": 2.5657179600814275, - "learning_rate": 4.832479606831086e-09, - "loss": 0.5107, - "step": 9374 + "grad_norm": 1.5845993816901094, + "learning_rate": 3.010763009097528e-09, + "loss": 0.5347, + "step": 13941 }, { "epoch": 0.99, - "grad_norm": 5.080680659842182, - "learning_rate": 4.757859676891019e-09, - "loss": 0.5991, - "step": 9375 + "grad_norm": 1.6534109644248134, + "learning_rate": 2.9710213352052154e-09, + "loss": 0.481, + "step": 13942 }, { "epoch": 0.99, - "grad_norm": 2.8969187425122196, - "learning_rate": 4.683820079742218e-09, - "loss": 0.5855, - "step": 9376 + "grad_norm": 1.9495968329948787, + "learning_rate": 2.931543621315647e-09, + "loss": 0.5444, + "step": 13943 }, { "epoch": 0.99, - "grad_norm": 2.407260977526988, - "learning_rate": 4.610360823987803e-09, - "loss": 0.652, - "step": 9377 + "grad_norm": 4.010129273437819, + "learning_rate": 2.8923298695143764e-09, + "loss": 0.5241, + "step": 13944 }, { "epoch": 0.99, - "grad_norm": 2.990129810345131, - "learning_rate": 4.5374819181615015e-09, - "loss": 0.5611, - "step": 9378 + "grad_norm": 1.788717360800131, + "learning_rate": 2.8533800818730805e-09, + "loss": 0.4817, + "step": 13945 }, { "epoch": 0.99, - "grad_norm": 3.437506450033328, - "learning_rate": 4.465183370729875e-09, - "loss": 0.5923, - "step": 9379 + "grad_norm": 1.551542736081716, + "learning_rate": 2.814694260448447e-09, + "loss": 0.4421, + "step": 13946 }, { "epoch": 0.99, - "grad_norm": 3.476756229416724, - "learning_rate": 4.393465190092316e-09, - "loss": 0.6029, - "step": 9380 + "grad_norm": 2.1062601593786265, + "learning_rate": 2.7762724072843972e-09, + "loss": 0.4576, + "step": 13947 }, { "epoch": 0.99, - "grad_norm": 2.580270764805957, - "learning_rate": 4.322327384581604e-09, - "loss": 0.6063, - "step": 9381 + "grad_norm": 0.7015696800274323, + "learning_rate": 2.738114524410973e-09, + "loss": 0.4453, + "step": 13948 }, { "epoch": 0.99, - "grad_norm": 2.062993744196928, - "learning_rate": 4.251769962461683e-09, - "loss": 0.6294, - "step": 9382 + "grad_norm": 1.965126031220617, + "learning_rate": 2.7002206138432296e-09, + "loss": 0.5834, + "step": 13949 }, { "epoch": 0.99, - "grad_norm": 3.1334282612359785, - "learning_rate": 4.181792931929885e-09, - "loss": 0.6693, - "step": 9383 + "grad_norm": 1.8977511939332101, + "learning_rate": 2.662590677582899e-09, + "loss": 0.5022, + "step": 13950 }, { "epoch": 0.99, - "grad_norm": 1.989553082617997, - "learning_rate": 4.1123963011158175e-09, - "loss": 0.5787, - "step": 9384 + "grad_norm": 1.5666078402058337, + "learning_rate": 2.6252247176172807e-09, + "loss": 0.4664, + "step": 13951 }, { "epoch": 0.99, - "grad_norm": 2.2591858616247964, - "learning_rate": 4.043580078081921e-09, - "loss": 0.5738, - "step": 9385 + "grad_norm": 2.4062609051236463, + "learning_rate": 2.5881227359214612e-09, + "loss": 0.4835, + "step": 13952 }, { "epoch": 0.99, - "grad_norm": 2.6256761692714603, - "learning_rate": 3.975344270823467e-09, - "loss": 0.5571, - "step": 9386 + "grad_norm": 2.4216141545247125, + "learning_rate": 2.551284734454429e-09, + "loss": 0.4991, + "step": 13953 }, { "epoch": 0.99, - "grad_norm": 2.9188998285720706, - "learning_rate": 3.9076888872668914e-09, - "loss": 0.6569, - "step": 9387 + "grad_norm": 1.71318315457788, + "learning_rate": 2.514710715162405e-09, + "loss": 0.5112, + "step": 13954 }, { "epoch": 0.99, - "grad_norm": 2.4406062966062727, - "learning_rate": 3.84061393527313e-09, - "loss": 0.5535, - "step": 9388 + "grad_norm": 1.8297195138566402, + "learning_rate": 2.4784006799766224e-09, + "loss": 0.5685, + "step": 13955 }, { "epoch": 0.99, - "grad_norm": 2.499796926984259, - "learning_rate": 3.774119422634282e-09, - "loss": 0.6678, - "step": 9389 + "grad_norm": 1.4968817884785155, + "learning_rate": 2.442354630816102e-09, + "loss": 0.4958, + "step": 13956 }, { "epoch": 0.99, - "grad_norm": 2.50651659058804, - "learning_rate": 3.7082053570758338e-09, - "loss": 0.6311, - "step": 9390 + "grad_norm": 1.60279085261076, + "learning_rate": 2.4065725695837647e-09, + "loss": 0.5448, + "step": 13957 }, { "epoch": 0.99, - "grad_norm": 2.9483823752948752, - "learning_rate": 3.6428717462549944e-09, - "loss": 0.5463, - "step": 9391 + "grad_norm": 1.6974740872588794, + "learning_rate": 2.3710544981708772e-09, + "loss": 0.4998, + "step": 13958 }, { "epoch": 0.99, - "grad_norm": 3.723110555766217, - "learning_rate": 3.578118597762914e-09, - "loss": 0.6034, - "step": 9392 + "grad_norm": 1.832489573617952, + "learning_rate": 2.3358004184531602e-09, + "loss": 0.5252, + "step": 13959 }, { "epoch": 0.99, - "grad_norm": 2.5286683838105293, - "learning_rate": 3.5139459191213533e-09, - "loss": 0.6347, - "step": 9393 + "grad_norm": 1.666269563885567, + "learning_rate": 2.300810332293013e-09, + "loss": 0.5564, + "step": 13960 }, { "epoch": 0.99, - "grad_norm": 3.4531428030517914, - "learning_rate": 3.4503537177860145e-09, - "loss": 0.6048, - "step": 9394 + "grad_norm": 1.6095229705332432, + "learning_rate": 2.266084241538402e-09, + "loss": 0.4968, + "step": 13961 }, { "epoch": 0.99, - "grad_norm": 3.04607450890029, - "learning_rate": 3.3873420011448778e-09, - "loss": 0.5767, - "step": 9395 + "grad_norm": 1.7083493239825653, + "learning_rate": 2.2316221480239706e-09, + "loss": 0.5291, + "step": 13962 }, { "epoch": 0.99, - "grad_norm": 2.4392946355168275, - "learning_rate": 3.324910776519308e-09, - "loss": 0.596, - "step": 9396 + "grad_norm": 2.1015288880355527, + "learning_rate": 2.1974240535699296e-09, + "loss": 0.5109, + "step": 13963 }, { "epoch": 0.99, - "grad_norm": 3.422625198663431, - "learning_rate": 3.263060051161282e-09, - "loss": 0.5568, - "step": 9397 + "grad_norm": 1.8501412192309126, + "learning_rate": 2.163489959982612e-09, + "loss": 0.5253, + "step": 13964 }, { "epoch": 0.99, - "grad_norm": 2.418789715138034, - "learning_rate": 3.2017898322567185e-09, - "loss": 0.6036, - "step": 9398 + "grad_norm": 1.5778749058636505, + "learning_rate": 2.1298198690550277e-09, + "loss": 0.5046, + "step": 13965 }, { "epoch": 0.99, - "grad_norm": 4.929592023564839, - "learning_rate": 3.1411001269238127e-09, - "loss": 0.6467, - "step": 9399 + "grad_norm": 1.819524662966565, + "learning_rate": 2.096413782565754e-09, + "loss": 0.5485, + "step": 13966 }, { "epoch": 0.99, - "grad_norm": 2.2696725064713115, - "learning_rate": 3.080990942213591e-09, - "loss": 0.5624, - "step": 9400 + "grad_norm": 2.2237160586068887, + "learning_rate": 2.06327170227838e-09, + "loss": 0.5106, + "step": 13967 }, { "epoch": 0.99, - "grad_norm": 2.887084023089943, - "learning_rate": 3.0214622851093555e-09, - "loss": 0.6042, - "step": 9401 + "grad_norm": 1.8314724983876818, + "learning_rate": 2.030393629944838e-09, + "loss": 0.5859, + "step": 13968 }, { "epoch": 0.99, - "grad_norm": 2.2400748396131984, - "learning_rate": 2.9625141625266863e-09, - "loss": 0.6244, - "step": 9402 + "grad_norm": 4.15877215545157, + "learning_rate": 1.9977795673009614e-09, + "loss": 0.5743, + "step": 13969 }, { "epoch": 0.99, - "grad_norm": 2.4355210959399383, - "learning_rate": 2.9041465813145486e-09, - "loss": 0.5651, - "step": 9403 + "grad_norm": 1.7485228537003954, + "learning_rate": 1.9654295160703716e-09, + "loss": 0.5464, + "step": 13970 }, { "epoch": 0.99, - "grad_norm": 2.590028657934874, - "learning_rate": 2.8463595482530747e-09, - "loss": 0.6597, - "step": 9404 + "grad_norm": 1.7011822626352269, + "learning_rate": 1.933343477961147e-09, + "loss": 0.547, + "step": 13971 }, { "epoch": 0.99, - "grad_norm": 4.014180884320301, - "learning_rate": 2.7891530700563387e-09, - "loss": 0.6173, - "step": 9405 + "grad_norm": 1.7645272523558688, + "learning_rate": 1.9015214546685978e-09, + "loss": 0.5819, + "step": 13972 }, { "epoch": 0.99, - "grad_norm": 3.4571650309323445, - "learning_rate": 2.73252715337069e-09, - "loss": 0.6504, - "step": 9406 + "grad_norm": 1.6697872382637506, + "learning_rate": 1.869963447873602e-09, + "loss": 0.5491, + "step": 13973 }, { "epoch": 0.99, - "grad_norm": 2.9475010958623757, - "learning_rate": 2.6764818047736453e-09, - "loss": 0.5891, - "step": 9407 + "grad_norm": 1.4313479062996486, + "learning_rate": 1.8386694592426035e-09, + "loss": 0.4906, + "step": 13974 }, { "epoch": 0.99, - "grad_norm": 3.3576520108967984, - "learning_rate": 2.6210170307777726e-09, - "loss": 0.6288, - "step": 9408 + "grad_norm": 0.6871352763517582, + "learning_rate": 1.8076394904298355e-09, + "loss": 0.4467, + "step": 13975 }, { "epoch": 0.99, - "grad_norm": 2.4213655278930717, - "learning_rate": 2.5661328378262516e-09, - "loss": 0.5432, - "step": 9409 + "grad_norm": 2.038175941409779, + "learning_rate": 1.7768735430734319e-09, + "loss": 0.5831, + "step": 13976 }, { "epoch": 0.99, - "grad_norm": 3.796645905475894, - "learning_rate": 2.5118292322950933e-09, - "loss": 0.5317, - "step": 9410 + "grad_norm": 1.982093277656842, + "learning_rate": 1.746371618798759e-09, + "loss": 0.5693, + "step": 13977 }, { "epoch": 0.99, - "grad_norm": 2.7845772144730496, - "learning_rate": 2.4581062204931395e-09, - "loss": 0.5843, - "step": 9411 + "grad_norm": 1.460749766586072, + "learning_rate": 1.7161337192173055e-09, + "loss": 0.4956, + "step": 13978 }, { "epoch": 0.99, - "grad_norm": 3.8648265905534904, - "learning_rate": 2.404963808662064e-09, - "loss": 0.6527, - "step": 9412 + "grad_norm": 1.8070354015241525, + "learning_rate": 1.6861598459261275e-09, + "loss": 0.4441, + "step": 13979 }, { "epoch": 0.99, - "grad_norm": 2.7630651447270407, - "learning_rate": 2.3524020029758175e-09, - "loss": 0.687, - "step": 9413 + "grad_norm": 1.6922967097137218, + "learning_rate": 1.6564500005084028e-09, + "loss": 0.5516, + "step": 13980 }, { "epoch": 0.99, - "grad_norm": 8.873063326108902, - "learning_rate": 2.3004208095406268e-09, - "loss": 0.6214, - "step": 9414 + "grad_norm": 1.7263917473992119, + "learning_rate": 1.6270041845339867e-09, + "loss": 0.4878, + "step": 13981 }, { "epoch": 0.99, - "grad_norm": 3.581099597260234, - "learning_rate": 2.249020234395549e-09, - "loss": 0.6127, - "step": 9415 + "grad_norm": 1.7660840459601508, + "learning_rate": 1.597822399557747e-09, + "loss": 0.5831, + "step": 13982 }, { "epoch": 0.99, - "grad_norm": 0.8980029935743684, - "learning_rate": 2.198200283512475e-09, - "loss": 0.5578, - "step": 9416 + "grad_norm": 2.095193440996543, + "learning_rate": 1.5689046471217827e-09, + "loss": 0.5569, + "step": 13983 }, { "epoch": 0.99, - "grad_norm": 2.843841317169783, - "learning_rate": 2.14796096279557e-09, - "loss": 0.6437, - "step": 9417 + "grad_norm": 1.7220821767688, + "learning_rate": 1.5402509287532063e-09, + "loss": 0.5406, + "step": 13984 }, { "epoch": 0.99, - "grad_norm": 2.669266532272996, - "learning_rate": 2.0983022780807217e-09, - "loss": 0.5516, - "step": 9418 + "grad_norm": 1.812011646755183, + "learning_rate": 1.5118612459652516e-09, + "loss": 0.5099, + "step": 13985 }, { "epoch": 0.99, - "grad_norm": 2.9826082921006214, - "learning_rate": 2.049224235138314e-09, - "loss": 0.6909, - "step": 9419 + "grad_norm": 1.6141820431343143, + "learning_rate": 1.4837356002583847e-09, + "loss": 0.4825, + "step": 13986 }, { "epoch": 0.99, - "grad_norm": 2.724117248063, - "learning_rate": 2.0007268396687873e-09, - "loss": 0.6637, - "step": 9420 + "grad_norm": 0.6708826398290908, + "learning_rate": 1.4558739931175292e-09, + "loss": 0.4345, + "step": 13987 }, { "epoch": 0.99, - "grad_norm": 2.8185604270090447, - "learning_rate": 1.9528100973070784e-09, - "loss": 0.5829, - "step": 9421 + "grad_norm": 0.7701728061624483, + "learning_rate": 1.4282764260148407e-09, + "loss": 0.4017, + "step": 13988 }, { "epoch": 0.99, - "grad_norm": 3.095495170435446, - "learning_rate": 1.9054740136204007e-09, - "loss": 0.5911, - "step": 9422 + "grad_norm": 1.772943082822712, + "learning_rate": 1.4009429004085973e-09, + "loss": 0.4635, + "step": 13989 }, { "epoch": 0.99, - "grad_norm": 3.3241775905169297, - "learning_rate": 1.858718594107689e-09, - "loss": 0.6762, - "step": 9423 + "grad_norm": 1.9465925656932277, + "learning_rate": 1.3738734177415335e-09, + "loss": 0.4903, + "step": 13990 }, { "epoch": 0.99, - "grad_norm": 2.856463977964485, - "learning_rate": 1.8125438442007093e-09, - "loss": 0.5795, - "step": 9424 + "grad_norm": 8.354655856995636, + "learning_rate": 1.3470679794441721e-09, + "loss": 0.4462, + "step": 13991 }, { "epoch": 0.99, - "grad_norm": 1.1071167853953825, - "learning_rate": 1.766949769264059e-09, - "loss": 0.5487, - "step": 9425 + "grad_norm": 1.4804488290996796, + "learning_rate": 1.3205265869326022e-09, + "loss": 0.5656, + "step": 13992 }, { "epoch": 0.99, - "grad_norm": 0.898970735503558, - "learning_rate": 1.7219363745946127e-09, - "loss": 0.5282, - "step": 9426 + "grad_norm": 1.6857169140227601, + "learning_rate": 1.2942492416090357e-09, + "loss": 0.4849, + "step": 13993 }, { "epoch": 0.99, - "grad_norm": 8.223703639655533, - "learning_rate": 1.6775036654226307e-09, - "loss": 0.5858, - "step": 9427 + "grad_norm": 1.5167864934891746, + "learning_rate": 1.2682359448606962e-09, + "loss": 0.5682, + "step": 13994 }, { "epoch": 0.99, - "grad_norm": 2.6009487272996834, - "learning_rate": 1.6336516469089846e-09, - "loss": 0.6202, - "step": 9428 + "grad_norm": 2.570461044097694, + "learning_rate": 1.2424866980620398e-09, + "loss": 0.5096, + "step": 13995 }, { "epoch": 0.99, - "grad_norm": 3.470594389195533, - "learning_rate": 1.5903803241490435e-09, - "loss": 0.611, - "step": 9429 + "grad_norm": 1.9851574796917155, + "learning_rate": 1.2170015025736448e-09, + "loss": 0.5452, + "step": 13996 }, { "epoch": 0.99, - "grad_norm": 2.4607686425198088, - "learning_rate": 1.5476897021698968e-09, - "loss": 0.6361, - "step": 9430 + "grad_norm": 1.8432076841773941, + "learning_rate": 1.1917803597411015e-09, + "loss": 0.4942, + "step": 13997 }, { "epoch": 0.99, - "grad_norm": 3.015171435256967, - "learning_rate": 1.5055797859309108e-09, - "loss": 0.6213, - "step": 9431 + "grad_norm": 1.8307737376337636, + "learning_rate": 1.1668232708972327e-09, + "loss": 0.55, + "step": 13998 }, { "epoch": 0.99, - "grad_norm": 2.3552305276446104, - "learning_rate": 1.4640505803248384e-09, - "loss": 0.5813, - "step": 9432 + "grad_norm": 1.626935979482044, + "learning_rate": 1.142130237360428e-09, + "loss": 0.5679, + "step": 13999 }, { "epoch": 0.99, - "grad_norm": 3.0756788209013686, - "learning_rate": 1.4231020901755988e-09, - "loss": 0.5959, - "step": 9433 + "grad_norm": 2.0746799200345234, + "learning_rate": 1.1177012604340897e-09, + "loss": 0.5214, + "step": 14000 }, { "epoch": 0.99, - "grad_norm": 2.8917362317695483, - "learning_rate": 1.3827343202410527e-09, - "loss": 0.6495, - "step": 9434 + "grad_norm": 1.5044714612318666, + "learning_rate": 1.093536341409407e-09, + "loss": 0.4765, + "step": 14001 }, { "epoch": 0.99, - "grad_norm": 2.4911357145002206, - "learning_rate": 1.342947275211337e-09, - "loss": 0.6533, - "step": 9435 + "grad_norm": 1.8191842593159564, + "learning_rate": 1.069635481563136e-09, + "loss": 0.4962, + "step": 14002 }, { "epoch": 0.99, - "grad_norm": 4.243456265519813, - "learning_rate": 1.3037409597077555e-09, - "loss": 0.5686, - "step": 9436 + "grad_norm": 2.3438972914403795, + "learning_rate": 1.0459986821570455e-09, + "loss": 0.5304, + "step": 14003 }, { "epoch": 0.99, - "grad_norm": 2.4091325818193186, - "learning_rate": 1.265115378286108e-09, - "loss": 0.5949, - "step": 9437 + "grad_norm": 2.079169482822846, + "learning_rate": 1.0226259444401365e-09, + "loss": 0.4888, + "step": 14004 }, { "epoch": 0.99, - "grad_norm": 2.6371286497158675, - "learning_rate": 1.2270705354333612e-09, - "loss": 0.6817, - "step": 9438 + "grad_norm": 2.0814805982754025, + "learning_rate": 9.995172696475318e-10, + "loss": 0.5042, + "step": 14005 }, { "epoch": 0.99, - "grad_norm": 2.8038456517621673, - "learning_rate": 1.1896064355698678e-09, - "loss": 0.6074, - "step": 9439 + "grad_norm": 1.5929471243458846, + "learning_rate": 9.766726589988118e-10, + "loss": 0.5144, + "step": 14006 }, { "epoch": 0.99, - "grad_norm": 2.2758737769300783, - "learning_rate": 1.152723083047702e-09, - "loss": 0.5709, - "step": 9440 + "grad_norm": 1.7345748920472417, + "learning_rate": 9.540921137013438e-10, + "loss": 0.5688, + "step": 14007 }, { "epoch": 0.99, - "grad_norm": 2.5994595563728633, - "learning_rate": 1.11642048215177e-09, - "loss": 0.6447, - "step": 9441 + "grad_norm": 1.6687922514862206, + "learning_rate": 9.31775634947507e-10, + "loss": 0.5232, + "step": 14008 }, { "epoch": 0.99, - "grad_norm": 2.5885198513951937, - "learning_rate": 1.0806986370998086e-09, - "loss": 0.6026, - "step": 9442 + "grad_norm": 1.7899400163578012, + "learning_rate": 9.09723223916914e-10, + "loss": 0.5053, + "step": 14009 }, { "epoch": 0.99, - "grad_norm": 2.56828110164626, - "learning_rate": 1.0455575520418315e-09, - "loss": 0.6416, - "step": 9443 + "grad_norm": 1.6676304753767186, + "learning_rate": 8.879348817736333e-10, + "loss": 0.5607, + "step": 14010 }, { "epoch": 0.99, - "grad_norm": 2.5864916103094, - "learning_rate": 1.0109972310606842e-09, - "loss": 0.5943, - "step": 9444 + "grad_norm": 1.5481319237365292, + "learning_rate": 8.664106096689662e-10, + "loss": 0.5251, + "step": 14011 }, { "epoch": 0.99, - "grad_norm": 3.1320803492939415, - "learning_rate": 9.770176781709329e-10, - "loss": 0.5664, - "step": 9445 + "grad_norm": 1.7660253000527921, + "learning_rate": 8.451504087403362e-10, + "loss": 0.4758, + "step": 14012 }, { "epoch": 0.99, - "grad_norm": 2.401636357615803, - "learning_rate": 9.436188973210858e-10, - "loss": 0.5257, - "step": 9446 + "grad_norm": 1.4472550218715898, + "learning_rate": 8.241542801096236e-10, + "loss": 0.4407, + "step": 14013 }, { "epoch": 0.99, - "grad_norm": 2.253966978691805, - "learning_rate": 9.108008923902623e-10, - "loss": 0.5868, - "step": 9447 + "grad_norm": 0.6484917153509575, + "learning_rate": 8.034222248870516e-10, + "loss": 0.3993, + "step": 14014 }, { "epoch": 0.99, - "grad_norm": 2.506566563109066, - "learning_rate": 8.785636671920783e-10, - "loss": 0.6921, - "step": 9448 + "grad_norm": 1.801084926177681, + "learning_rate": 7.829542441672999e-10, + "loss": 0.4762, + "step": 14015 }, { "epoch": 0.99, - "grad_norm": 2.107145184612608, - "learning_rate": 8.469072254713162e-10, - "loss": 0.5729, - "step": 9449 + "grad_norm": 1.5949156531313347, + "learning_rate": 7.627503390311707e-10, + "loss": 0.5015, + "step": 14016 }, { "epoch": 0.99, - "grad_norm": 0.906976575314038, - "learning_rate": 8.158315709055897e-10, - "loss": 0.4864, - "step": 9450 + "grad_norm": 1.9214136663714037, + "learning_rate": 7.428105105466987e-10, + "loss": 0.5853, + "step": 14017 }, { "epoch": 0.99, - "grad_norm": 1.135047161405744, - "learning_rate": 7.853367071053441e-10, - "loss": 0.5252, - "step": 9451 + "grad_norm": 2.11614414038328, + "learning_rate": 7.231347597669303e-10, + "loss": 0.5556, + "step": 14018 }, { "epoch": 0.99, - "grad_norm": 2.453216748538149, - "learning_rate": 7.554226376133012e-10, - "loss": 0.6918, - "step": 9452 + "grad_norm": 1.9136872693995517, + "learning_rate": 7.037230877304791e-10, + "loss": 0.483, + "step": 14019 }, { "epoch": 0.99, - "grad_norm": 3.3255864656684557, - "learning_rate": 7.26089365905569e-10, - "loss": 0.5933, - "step": 9453 + "grad_norm": 1.6428634765830783, + "learning_rate": 6.845754954637462e-10, + "loss": 0.5477, + "step": 14020 }, { "epoch": 0.99, - "grad_norm": 3.4761913980531283, - "learning_rate": 6.97336895388867e-10, - "loss": 0.5683, - "step": 9454 + "grad_norm": 1.603031871162165, + "learning_rate": 6.656919839775899e-10, + "loss": 0.4921, + "step": 14021 }, { - "epoch": 0.99, - "grad_norm": 2.9519019016205963, - "learning_rate": 6.691652294038564e-10, - "loss": 0.6096, - "step": 9455 + "epoch": 1.0, + "grad_norm": 0.7836345747264397, + "learning_rate": 6.470725542695455e-10, + "loss": 0.4219, + "step": 14022 }, { "epoch": 1.0, - "grad_norm": 3.0498036153708985, - "learning_rate": 6.415743712240296e-10, - "loss": 0.5917, - "step": 9456 + "grad_norm": 1.6719921562129527, + "learning_rate": 6.287172073232706e-10, + "loss": 0.5283, + "step": 14023 }, { "epoch": 1.0, - "grad_norm": 2.0448500807091055, - "learning_rate": 6.145643240540456e-10, - "loss": 0.5391, - "step": 9457 + "grad_norm": 1.9625170399960061, + "learning_rate": 6.106259441085449e-10, + "loss": 0.5028, + "step": 14024 }, { "epoch": 1.0, - "grad_norm": 2.6179190080516817, - "learning_rate": 5.881350910325046e-10, - "loss": 0.5956, - "step": 9458 + "grad_norm": 1.780084880351303, + "learning_rate": 5.927987655801603e-10, + "loss": 0.5443, + "step": 14025 }, { "epoch": 1.0, - "grad_norm": 2.3232050763453804, - "learning_rate": 5.622866752291734e-10, - "loss": 0.6394, - "step": 9459 + "grad_norm": 1.944805460023378, + "learning_rate": 5.752356726812514e-10, + "loss": 0.6018, + "step": 14026 }, { "epoch": 1.0, - "grad_norm": 2.7020619835422437, - "learning_rate": 5.370190796483155e-10, - "loss": 0.6787, - "step": 9460 + "grad_norm": 1.8659361151212914, + "learning_rate": 5.57936666338299e-10, + "loss": 0.557, + "step": 14027 }, { "epoch": 1.0, - "grad_norm": 2.2927217439792016, - "learning_rate": 5.123323072236952e-10, - "loss": 0.6097, - "step": 9461 + "grad_norm": 1.6311114993047828, + "learning_rate": 5.40901747465572e-10, + "loss": 0.5586, + "step": 14028 }, { "epoch": 1.0, - "grad_norm": 2.10178763835819, - "learning_rate": 4.88226360824684e-10, - "loss": 0.64, - "step": 9462 + "grad_norm": 1.5908724331481414, + "learning_rate": 5.241309169634612e-10, + "loss": 0.4566, + "step": 14029 }, { "epoch": 1.0, - "grad_norm": 2.5684498862280543, - "learning_rate": 4.647012432512643e-10, - "loss": 0.4799, - "step": 9463 + "grad_norm": 1.6432804514463457, + "learning_rate": 5.076241757168143e-10, + "loss": 0.517, + "step": 14030 }, { "epoch": 1.0, - "grad_norm": 2.2494270384655715, - "learning_rate": 4.417569572368052e-10, - "loss": 0.6441, - "step": 9464 + "grad_norm": 2.113234072828806, + "learning_rate": 4.913815245977117e-10, + "loss": 0.547, + "step": 14031 }, { "epoch": 1.0, - "grad_norm": 2.678614389562663, - "learning_rate": 4.1939350544695224e-10, - "loss": 0.5611, - "step": 9465 + "grad_norm": 2.083481131375769, + "learning_rate": 4.754029644649105e-10, + "loss": 0.5233, + "step": 14032 }, { "epoch": 1.0, - "grad_norm": 2.949351619951574, - "learning_rate": 3.9761089047907206e-10, - "loss": 0.5365, - "step": 9466 + "grad_norm": 1.5728150560074747, + "learning_rate": 4.596884961621806e-10, + "loss": 0.5207, + "step": 14033 }, { "epoch": 1.0, - "grad_norm": 2.470566883786651, - "learning_rate": 3.764091148650284e-10, - "loss": 0.5893, - "step": 9467 + "grad_norm": 1.5666707792536556, + "learning_rate": 4.442381205188584e-10, + "loss": 0.542, + "step": 14034 }, { "epoch": 1.0, - "grad_norm": 2.925739305236972, - "learning_rate": 3.5578818106674073e-10, - "loss": 0.4812, - "step": 9468 + "grad_norm": 2.391347631703284, + "learning_rate": 4.290518383520681e-10, + "loss": 0.4411, + "step": 14035 }, { "epoch": 1.0, - "grad_norm": 2.416620880372823, - "learning_rate": 3.3574809148062546e-10, - "loss": 0.5753, - "step": 9469 + "grad_norm": 3.8580915530930837, + "learning_rate": 4.1412965046394584e-10, + "loss": 0.5392, + "step": 14036 }, { "epoch": 1.0, - "grad_norm": 2.779623019239646, - "learning_rate": 3.1628884843537546e-10, - "loss": 0.6176, - "step": 9470 + "grad_norm": 0.6764842922152625, + "learning_rate": 3.994715576421948e-10, + "loss": 0.4546, + "step": 14037 }, { "epoch": 1.0, - "grad_norm": 2.564403061347666, - "learning_rate": 2.974104541902945e-10, - "loss": 0.539, - "step": 9471 + "grad_norm": 1.829444286521338, + "learning_rate": 3.850775606611956e-10, + "loss": 0.5479, + "step": 14038 }, { "epoch": 1.0, - "grad_norm": 2.6305758356659785, - "learning_rate": 2.7911291093973835e-10, - "loss": 0.6094, - "step": 9472 + "grad_norm": 1.600367140098887, + "learning_rate": 3.70947660281451e-10, + "loss": 0.4736, + "step": 14039 }, { "epoch": 1.0, - "grad_norm": 2.6682590701908198, - "learning_rate": 2.61396220808674e-10, - "loss": 0.6554, - "step": 9473 + "grad_norm": 1.6036436643465142, + "learning_rate": 3.570818572490309e-10, + "loss": 0.4995, + "step": 14040 }, { "epoch": 1.0, - "grad_norm": 2.690023661401972, - "learning_rate": 2.4426038585656507e-10, - "loss": 0.5741, - "step": 9474 + "grad_norm": 1.6785783624893744, + "learning_rate": 3.434801522966824e-10, + "loss": 0.526, + "step": 14041 }, { "epoch": 1.0, - "grad_norm": 6.037525938734319, - "learning_rate": 2.277054080729313e-10, - "loss": 0.6665, - "step": 9475 + "grad_norm": 2.538593086652002, + "learning_rate": 3.30142546143275e-10, + "loss": 0.52, + "step": 14042 }, { "epoch": 1.0, - "grad_norm": 3.649606612868773, - "learning_rate": 2.117312893817891e-10, - "loss": 0.6428, - "step": 9476 + "grad_norm": 2.0055212667516527, + "learning_rate": 3.1706903949269006e-10, + "loss": 0.4835, + "step": 14043 }, { "epoch": 1.0, - "grad_norm": 2.1735139647394646, - "learning_rate": 1.9633803163887633e-10, - "loss": 0.5668, - "step": 9477 + "grad_norm": 1.5828870526811387, + "learning_rate": 3.0425963303604143e-10, + "loss": 0.4987, + "step": 14044 }, { "epoch": 1.0, - "grad_norm": 2.423065344674815, - "learning_rate": 1.8152563663220712e-10, - "loss": 0.5754, - "step": 9478 + "grad_norm": 1.6482968313308326, + "learning_rate": 2.9171432744945494e-10, + "loss": 0.4801, + "step": 14045 }, { "epoch": 1.0, - "grad_norm": 2.4791765768968714, - "learning_rate": 1.672941060826272e-10, - "loss": 0.6408, - "step": 9479 + "grad_norm": 1.6819540731450964, + "learning_rate": 2.794331233957337e-10, + "loss": 0.4334, + "step": 14046 }, { "epoch": 1.0, - "grad_norm": 2.3385528810246217, - "learning_rate": 1.5364344164436885e-10, - "loss": 0.557, - "step": 9480 + "grad_norm": 1.7548608087235462, + "learning_rate": 2.6741602152380307e-10, + "loss": 0.4917, + "step": 14047 }, { "epoch": 1.0, - "grad_norm": 2.724307971466079, - "learning_rate": 1.4057364490227542e-10, - "loss": 0.5809, - "step": 9481 + "grad_norm": 1.4693252108480692, + "learning_rate": 2.5566302246815556e-10, + "loss": 0.4757, + "step": 14048 }, { "epoch": 1.0, - "grad_norm": 2.66578465438902, - "learning_rate": 1.2808471737568717e-10, - "loss": 0.5427, - "step": 9482 + "grad_norm": 1.6482174010241917, + "learning_rate": 2.4417412684996087e-10, + "loss": 0.5317, + "step": 14049 }, { "epoch": 1.0, - "grad_norm": 2.2921319289193, - "learning_rate": 1.1617666051455534e-10, - "loss": 0.6357, - "step": 9483 + "grad_norm": 2.1973443884683697, + "learning_rate": 2.32949335275956e-10, + "loss": 0.5958, + "step": 14050 }, { "epoch": 1.0, - "grad_norm": 2.6535598474354454, - "learning_rate": 1.0484947570277293e-10, - "loss": 0.5376, - "step": 9484 + "grad_norm": 1.7171854302664875, + "learning_rate": 2.2198864833955503e-10, + "loss": 0.568, + "step": 14051 }, { "epoch": 1.0, - "grad_norm": 2.6190379885009096, - "learning_rate": 9.410316425706445e-11, - "loss": 0.6051, - "step": 9485 + "grad_norm": 1.6311906211724032, + "learning_rate": 2.1129206661862911e-10, + "loss": 0.4749, + "step": 14052 }, { "epoch": 1.0, - "grad_norm": 2.5315721986313395, - "learning_rate": 8.393772742421036e-11, - "loss": 0.6413, - "step": 9486 + "grad_norm": 1.7532043405981916, + "learning_rate": 2.0085959067939198e-10, + "loss": 0.5268, + "step": 14053 }, { "epoch": 1.0, - "grad_norm": 2.7299924264341318, - "learning_rate": 7.435316638715329e-11, - "loss": 0.5898, - "step": 9487 + "grad_norm": 1.5394554284054034, + "learning_rate": 1.9069122107195915e-10, + "loss": 0.5074, + "step": 14054 }, { "epoch": 1.0, - "grad_norm": 0.9023624234891299, - "learning_rate": 6.53494822577816e-11, - "loss": 0.4633, - "step": 9488 + "grad_norm": 1.7525939719136379, + "learning_rate": 1.8078695833423364e-10, + "loss": 0.5618, + "step": 14055 }, { "epoch": 1.0, - "grad_norm": 2.4516895016558786, - "learning_rate": 5.6926676083035593e-11, - "loss": 0.6467, - "step": 9489 + "grad_norm": 3.142876594191645, + "learning_rate": 1.7114680298857544e-10, + "loss": 0.4394, + "step": 14056 }, { "epoch": 1.0, - "grad_norm": 2.440327397988931, - "learning_rate": 4.908474884102177e-11, - "loss": 0.558, - "step": 9490 + "grad_norm": 1.8426101628565785, + "learning_rate": 1.61770755545132e-10, + "loss": 0.5479, + "step": 14057 }, { "epoch": 1.0, - "grad_norm": 3.9948194582261793, - "learning_rate": 4.1823701442678114e-11, - "loss": 0.6246, - "step": 9491 + "grad_norm": 1.958393493658163, + "learning_rate": 1.5265881649850767e-10, + "loss": 0.5332, + "step": 14058 }, { "epoch": 1.0, - "grad_norm": 2.751705490521763, - "learning_rate": 3.514353473232923e-11, - "loss": 0.5296, - "step": 9492 + "grad_norm": 1.5706728300954715, + "learning_rate": 1.4381098633053926e-10, + "loss": 0.5291, + "step": 14059 }, { "epoch": 1.0, - "grad_norm": 3.4070953890206592, - "learning_rate": 2.9044249485465914e-11, - "loss": 0.5484, - "step": 9493 + "grad_norm": 1.8135497633598439, + "learning_rate": 1.352272655075204e-10, + "loss": 0.5576, + "step": 14060 }, { "epoch": 1.0, - "grad_norm": 3.8452438404500078, - "learning_rate": 2.3525846410965557e-11, - "loss": 0.6063, - "step": 9494 + "grad_norm": 1.5976135754572138, + "learning_rate": 1.269076544846426e-10, + "loss": 0.5368, + "step": 14061 }, { "epoch": 1.0, - "grad_norm": 2.5585166775524413, - "learning_rate": 1.858832614942685e-11, - "loss": 0.568, - "step": 9495 + "grad_norm": 1.89458883691368, + "learning_rate": 1.188521536998888e-10, + "loss": 0.5673, + "step": 14062 }, { "epoch": 1.0, - "grad_norm": 3.292945274005528, - "learning_rate": 1.4231689274835093e-11, - "loss": 0.6767, - "step": 9496 + "grad_norm": 1.6235713373143799, + "learning_rate": 1.110607635790295e-10, + "loss": 0.5327, + "step": 14063 }, { "epoch": 1.0, - "grad_norm": 2.339794191169871, - "learning_rate": 1.0455936293451985e-11, - "loss": 0.5886, - "step": 9497 + "grad_norm": 1.7708324529793058, + "learning_rate": 1.035334845339575e-10, + "loss": 0.5186, + "step": 14064 }, { "epoch": 1.0, - "grad_norm": 2.5328995861443517, - "learning_rate": 7.261067643815622e-12, - "loss": 0.5278, - "step": 9498 + "grad_norm": 1.653001111268248, + "learning_rate": 9.627031696268773e-11, + "loss": 0.5313, + "step": 14065 }, { "epoch": 1.0, - "grad_norm": 2.1976850234192837, - "learning_rate": 4.647083696740495e-12, - "loss": 0.5848, - "step": 9499 + "grad_norm": 2.210710247897176, + "learning_rate": 8.927126124824714e-11, + "loss": 0.4716, + "step": 14066 }, { "epoch": 1.0, - "grad_norm": 2.9136231286554226, - "learning_rate": 2.613984756427712e-12, - "loss": 0.6258, - "step": 9500 + "grad_norm": 0.6571520616949684, + "learning_rate": 8.253631776033999e-11, + "loss": 0.4173, + "step": 14067 }, { "epoch": 1.0, - "grad_norm": 2.1228901797117725, - "learning_rate": 1.161771059354777e-12, - "loss": 0.6679, - "step": 9501 + "grad_norm": 1.6636133675495723, + "learning_rate": 7.606548685479276e-11, + "loss": 0.534, + "step": 14068 }, { "epoch": 1.0, - "grad_norm": 2.8882038893522686, - "learning_rate": 2.90442773165367e-13, - "loss": 0.5436, - "step": 9502 + "grad_norm": 1.5453879746003851, + "learning_rate": 6.985876887355413e-11, + "loss": 0.4572, + "step": 14069 + }, + { + "epoch": 1.0, + "grad_norm": 2.547377051744731, + "learning_rate": 6.391616414469504e-11, + "loss": 0.5853, + "step": 14070 + }, + { + "epoch": 1.0, + "grad_norm": 1.7580175269455187, + "learning_rate": 5.823767298185346e-11, + "loss": 0.5022, + "step": 14071 + }, + { + "epoch": 1.0, + "grad_norm": 1.7052096978189406, + "learning_rate": 5.282329568478961e-11, + "loss": 0.5011, + "step": 14072 + }, + { + "epoch": 1.0, + "grad_norm": 1.7261080841364274, + "learning_rate": 4.767303253994105e-11, + "loss": 0.5588, + "step": 14073 + }, + { + "epoch": 1.0, + "grad_norm": 0.6907828754992204, + "learning_rate": 4.278688381875728e-11, + "loss": 0.4066, + "step": 14074 + }, + { + "epoch": 1.0, + "grad_norm": 2.7724862846500646, + "learning_rate": 3.816484978047541e-11, + "loss": 0.5217, + "step": 14075 + }, + { + "epoch": 1.0, + "grad_norm": 0.6620951611880568, + "learning_rate": 3.380693066767915e-11, + "loss": 0.4241, + "step": 14076 + }, + { + "epoch": 1.0, + "grad_norm": 1.5700137204719178, + "learning_rate": 2.971312671185001e-11, + "loss": 0.4925, + "step": 14077 + }, + { + "epoch": 1.0, + "grad_norm": 1.6862164612767983, + "learning_rate": 2.5883438128926354e-11, + "loss": 0.5114, + "step": 14078 + }, + { + "epoch": 1.0, + "grad_norm": 1.5462663339023022, + "learning_rate": 2.2317865120968785e-11, + "loss": 0.5178, + "step": 14079 + }, + { + "epoch": 1.0, + "grad_norm": 1.7378250059874554, + "learning_rate": 1.9016407876160102e-11, + "loss": 0.4925, + "step": 14080 + }, + { + "epoch": 1.0, + "grad_norm": 0.691874180294237, + "learning_rate": 1.5979066569360435e-11, + "loss": 0.4342, + "step": 14081 + }, + { + "epoch": 1.0, + "grad_norm": 2.08339024644167, + "learning_rate": 1.3205841360441896e-11, + "loss": 0.4438, + "step": 14082 }, { "epoch": 1.0, - "grad_norm": 3.3614569935196172, + "grad_norm": 2.06953941839617, + "learning_rate": 1.0696732396509036e-11, + "loss": 0.5204, + "step": 14083 + }, + { + "epoch": 1.0, + "grad_norm": 1.6668875675980066, + "learning_rate": 8.451739810233505e-12, + "loss": 0.5328, + "step": 14084 + }, + { + "epoch": 1.0, + "grad_norm": 2.3720684453129035, + "learning_rate": 6.4708637192989475e-12, + "loss": 0.5081, + "step": 14085 + }, + { + "epoch": 1.0, + "grad_norm": 3.1296512908248424, + "learning_rate": 4.7541042291765485e-12, + "loss": 0.5259, + "step": 14086 + }, + { + "epoch": 1.0, + "grad_norm": 1.572778221995818, + "learning_rate": 3.301461430349484e-12, + "loss": 0.458, + "step": 14087 + }, + { + "epoch": 1.0, + "grad_norm": 1.8555207340843716, + "learning_rate": 2.1129353988680323e-12, + "loss": 0.4669, + "step": 14088 + }, + { + "epoch": 1.0, + "grad_norm": 1.7386741818807192, + "learning_rate": 1.188526198570017e-12, + "loss": 0.5212, + "step": 14089 + }, + { + "epoch": 1.0, + "grad_norm": 2.7436704099250657, + "learning_rate": 5.282338777501394e-13, + "loss": 0.5915, + "step": 14090 + }, + { + "epoch": 1.0, + "grad_norm": 1.9236723627453942, + "learning_rate": 1.3205847138042515e-13, + "loss": 0.525, + "step": 14091 + }, + { + "epoch": 1.0, + "grad_norm": 1.2388009305235081, "learning_rate": 0.0, - "loss": 0.5138, - "step": 9503 + "loss": 0.4138, + "step": 14092 }, { "epoch": 1.0, - "step": 9503, - "total_flos": 4.083480469386035e+16, - "train_loss": 0.6581587371017812, - "train_runtime": 215857.4487, - "train_samples_per_second": 8.453, - "train_steps_per_second": 0.044 + "step": 14092, + "total_flos": 4090201459113984.0, + "train_loss": 0.5389912225498484, + "train_runtime": 197525.1262, + "train_samples_per_second": 13.698, + "train_steps_per_second": 0.071 } ], "logging_steps": 1.0, - "max_steps": 9503, + "max_steps": 14092, "num_input_tokens_seen": 0, "num_train_epochs": 1, - "save_steps": 30000, - "total_flos": 4.083480469386035e+16, - "train_batch_size": 2, + "save_steps": 3000, + "total_flos": 4090201459113984.0, + "train_batch_size": 1, "trial_name": null, "trial_params": null }